-- $Id: biosqldb-pg.sql 286 2008-08-01 04:17:56Z lapp $
--
-- Copyright 2002-2003 Ewan Birney, Elia Stupka, Chris Mungall
-- Copyright 2003-2008 Hilmar Lapp
--
-- This file is part of BioSQL.
--
-- BioSQL is free software: you can redistribute it and/or modify it
-- under the terms of the GNU Lesser General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- BioSQL is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU Lesser General Public License for more details.
--
-- You should have received a copy of the GNU Lesser General Public License
-- along with BioSQL. If not, see .
--
-- ========================================================================
--
-- Authors: Ewan Birney, Elia Stupka, Hilmar Lapp, Aaron Mackey
-- Post-Cape Town changes by Hilmar Lapp.
-- Singapore changes by Hilmar Lapp and Aaron Mackey.
--
-- conventions:
-- _id is primary internal id (usually autogenerated)
--
-- comments to biosql - biosql-l@open-bio.org
--
-- Note that some aspects of the schema like uniqueness constraints
-- may be changed to best suit your requirements. Search for the tag
-- CONFIG and read the documentation you find there.
--
-- CONFIG: PostgreSQL supports 'schemas' since v7.4. A schema here is
-- essentially a namespace for a collection of tables (and other
-- database objects such as views, indexes, etc) within a 'database.'
-- Bioperl-db loading scripts (load_seqdatabase.pl, load_ontology.pl,
-- etc) do support specifying a schema, as does load_ncbi_taxonomy.pl.
-- If you want to use Biosql with a specific schema uncomment this:
-- CREATE SCHEMA biosql;
-- SET search_path to biosql;
--
-- The Biosql database has bioentries. That is about it.
-- We do not store different versions of a database as different dbids
-- (there is no concept of versions of database). There is a concept of
-- versions of entries. Versions of databases deserve their own table and
-- join to bioentry table for tracking with versions of entries
--
-- If you want to use Biosql with a specific schema uncomment this:
-- CREATE SCHEMA biosql;
-- SET search_path to biosql;
CREATE SEQUENCE biodatabase_pk_seq;
CREATE TABLE biodatabase (
biodatabase_id INTEGER DEFAULT nextval ( 'biodatabase_pk_seq' ) NOT NULL ,
name VARCHAR ( 128 ) NOT NULL ,
authority VARCHAR ( 128 ) ,
description TEXT ,
PRIMARY KEY ( biodatabase_id ) ,
UNIQUE ( name ) ) ;
CREATE INDEX db_auth on biodatabase ( authority );
-- we could insist that taxa are NCBI taxon id, but on reflection I made this
-- an optional extra line, as many flat file formats do not have the NCBI id
--
-- no organelle/sub species
--
-- this corresponds to the node table of the NCBI taxonomy database
-- left_value, right_value implement a nested sets model;
-- see http://www.oreillynet.com/pub/a/network/2002/11/27/bioconf.html
-- or Joe Celko's 'SQL for smarties' for more information.
CREATE SEQUENCE taxon_pk_seq;
CREATE TABLE taxon (
taxon_id INTEGER DEFAULT nextval ( 'taxon_pk_seq' ) NOT NULL ,
ncbi_taxon_id INTEGER ,
parent_taxon_id INTEGER ,
node_rank VARCHAR ( 32 ) ,
genetic_code SMALLINT ,
mito_genetic_code SMALLINT ,
left_value INTEGER ,
right_value INTEGER ,
PRIMARY KEY ( taxon_id ) ,
CONSTRAINT XAKtaxon_ncbi_taxon_id UNIQUE ( ncbi_taxon_id ) ,
CONSTRAINT XAKtaxon_left_value UNIQUE ( left_value ) ,
CONSTRAINT XAKtaxon_right_value UNIQUE ( right_value ) ) ;
CREATE INDEX taxparent ON taxon ( parent_taxon_id );
-- corresponds to the names table of the NCBI taxonomy databaase
CREATE TABLE taxon_name (
taxon_id INTEGER NOT NULL ,
name VARCHAR ( 255 ) NOT NULL ,
name_class VARCHAR ( 32 ) NOT NULL ,
UNIQUE ( name , name_class, taxon_id ) ) ;
CREATE INDEX taxnametaxonid ON taxon_name ( taxon_id );
CREATE INDEX taxnamename ON taxon_name ( name );
-- this is the namespace (controlled vocabulary) ontology terms live in
-- we chose to have a separate table for this instead of reusing biodatabase
CREATE SEQUENCE ontology_pk_seq;
CREATE TABLE ontology (
ontology_id INTEGER DEFAULT nextval ( 'ontology_pk_seq' ) NOT NULL ,
name VARCHAR ( 32 ) NOT NULL ,
definition TEXT ,
PRIMARY KEY ( ontology_id ) ,
UNIQUE ( name ) ) ;
-- any controlled vocab term, everything from full ontology
-- terms eg GO IDs to the various keys allowed as qualifiers
CREATE SEQUENCE term_pk_seq;
CREATE TABLE term (
term_id INTEGER DEFAULT nextval ( 'term_pk_seq' ) NOT NULL ,
name VARCHAR ( 255 ) NOT NULL ,
definition TEXT ,
identifier VARCHAR ( 40 ) ,
is_obsolete CHAR ( 1 ) ,
ontology_id INTEGER NOT NULL ,
PRIMARY KEY ( term_id ) ,
-- CONFIG: uncomment exactly one of the two following lines. The
-- first one puts a unqiueness constraint on term name within an
-- ontology, which is a conservative approach. However, if you are
-- going to load GO and update it too, there are situations where
-- you'll run into problems with this constraint unless you delete
-- obsoleted terms (which has its own shortcomings, read the POD of
-- load_ontology.pl in bioperl-db). The second line includes the
-- obsoleteness into the uniqueness constraint.
-- UNIQUE (name,ontology_id),
UNIQUE ( name , ontology_id , is_obsolete ) ,
UNIQUE ( identifier ) ) ;
CREATE INDEX term_ont ON term ( ontology_id );
-- ontology terms have synonyms, here is how to store them.
-- Synonym is a reserved word in many RDBMSs, so the column synonym
-- may eventually be renamed to name.
CREATE TABLE term_synonym (
synonym VARCHAR(255) NOT NULL,
term_id INTEGER NOT NULL,
PRIMARY KEY ( term_id , synonym ) ) ;
-- ontology terms to dbxref association: ontology terms have dbxrefs
CREATE TABLE term_dbxref (
term_id INTEGER NOT NULL ,
dbxref_id INTEGER NOT NULL ,
rank INTEGER ,
PRIMARY KEY ( term_id , dbxref_id ) ) ;
CREATE INDEX trmdbxref_dbxrefid ON term_dbxref ( dbxref_id );
-- relationship between controlled vocabulary / ontology term
-- we use subject/predicate/object but this could also
-- be thought of as child/relationship-type/parent.
-- the subject/predicate/object naming is better as we
-- can think of the graph as composed of statements.
--
-- we also treat the relationshiptypes / predicates as
-- controlled terms in themselves; this is quite useful
-- as a lot of systems (eg GO) will soon require
-- ontologies of relationship types (eg subtle differences
-- in the partOf relationship)
--
-- this table probably won't be filled for a while, the core
-- will just treat ontologies as flat lists of terms
CREATE SEQUENCE term_relationship_pk_seq;
CREATE TABLE term_relationship (
term_relationship_id INTEGER DEFAULT nextval ( 'term_relationship_pk_seq' ) NOT NULL ,
subject_term_id INTEGER NOT NULL ,
predicate_term_id INTEGER NOT NULL ,
object_term_id INTEGER NOT NULL ,
ontology_id INTEGER NOT NULL ,
PRIMARY KEY ( term_relationship_id ) ,
UNIQUE ( subject_term_id , predicate_term_id , object_term_id , ontology_id ) ) ;
CREATE INDEX trmrel_predicateid ON term_relationship ( predicate_term_id );
CREATE INDEX trmrel_objectid ON term_relationship ( object_term_id );
CREATE INDEX trmrel_ontid ON term_relationship ( ontology_id );
-- CONFIG: you may want to add this if you can't get the optimizer to
-- use the composite index for the initial keys
--CREATE INDEX trmrel_subjectid ON term_relationship(subject_term_id);
-- This lets one associate a single term with a term_relationship
-- effecively allowing us to treat triples as 1st class terms.
--
-- At this point this table is only supported in Biojava. If you want
-- to know more about the rationale and idea behind it, read the
-- following article that Mat Pocock posted to the mailing list:
-- http://www.open-bio.org/pipermail/biosql-l/2003-October/000455.html
CREATE TABLE term_relationship_term (
term_relationship_id INTEGER NOT NULL,
term_id INTEGER NOT NULL,
PRIMARY KEY ( term_relationship_id ),
UNIQUE ( term_id )
);
-- the infamous transitive closure table on ontology term relationships
-- this is a warehouse approach - you will need to update this regularly
--
-- the triple of (subject, predicate, object) is the same as for ontology
-- relationships, with the exception of predicate being the greatest common
-- denominator of the relationships types visited in the path (i.e., if
-- relationship type A is-a relationship type B, the greatest common
-- denominator for path containing both types A and B is B)
--
-- See the GO database or Chado schema for other (and possibly better
-- documented) implementations of the transitive closure table approach.
CREATE SEQUENCE term_path_pk_seq;
CREATE TABLE term_path (
term_path_id INTEGER DEFAULT nextval ( 'term_path_pk_seq' ) NOT NULL ,
subject_term_id INTEGER NOT NULL ,
predicate_term_id INTEGER NOT NULL ,
object_term_id INTEGER NOT NULL ,
ontology_id INTEGER NOT NULL ,
distance INTEGER ,
PRIMARY KEY (term_path_id),
UNIQUE ( subject_term_id , predicate_term_id , object_term_id , ontology_id , distance ) ) ;
CREATE INDEX trmpath_predicateid ON term_path ( predicate_term_id );
CREATE INDEX trmpath_objectid ON term_path ( object_term_id );
CREATE INDEX trmpath_ontid ON term_path ( ontology_id );
-- CONFIG: you may want to add this if you can't get the optimizer to
-- use the composite index for the initial keys
--CREATE INDEX trmpath_subjectid ON term_path(subject_term_id);
-- we can be a bioentry without a biosequence, but not visa-versa
-- most things are going to be keyed off bioentry_id
--
-- accession is the stable id, display_id is a potentially volatile,
-- human readable name.
--
-- Version may be unknown, may be undefined, or may not exist for a certain
-- accession or database (namespace). We require it here to avoid RDBMS-
-- dependend enforcement variants (version is in a compound alternative key),
-- and to simplify query construction for UK look-ups. If there is no version
-- the convention is to put 0 (zero) here. Likewise, a record with a version
-- of zero means the version is to be interpreted as NULL.
--
-- not all entries have a taxon, but many do.
--
-- one bioentry only has one taxon! (weirdo chimerias are not handled. tough)
--
-- Name maps to display_id in bioperl. We have a different column name
-- here to avoid confusion with the naming convention for foreign keys.
CREATE SEQUENCE bioentry_pk_seq;
CREATE TABLE bioentry (
bioentry_id INTEGER DEFAULT nextval ( 'bioentry_pk_seq' ) NOT NULL ,
biodatabase_id INTEGER NOT NULL ,
taxon_id INTEGER ,
name VARCHAR ( 40 ) NOT NULL ,
accession VARCHAR ( 128 ) NOT NULL ,
identifier VARCHAR ( 40 ) ,
division VARCHAR ( 6 ) ,
description TEXT ,
version INTEGER NOT NULL ,
PRIMARY KEY ( bioentry_id ) ,
UNIQUE ( accession , biodatabase_id , version ) ,
-- CONFIG: uncomment one (and only one) of the two lines below. The
-- first puts a uniqueness constraint on the identifier column alone;
-- the other one puts a uniqueness constraint on identifier only
-- within a namespace.
-- UNIQUE ( identifier )
UNIQUE ( identifier , biodatabase_id )
) ;
CREATE INDEX bioentry_name ON bioentry ( name );
CREATE INDEX bioentry_db ON bioentry ( biodatabase_id );
CREATE INDEX bioentry_tax ON bioentry ( taxon_id );
--
-- bioentry-bioentry relationships: these are typed
--
CREATE SEQUENCE bioentry_relationship_pk_seq;
CREATE TABLE bioentry_relationship (
bioentry_relationship_id INTEGER DEFAULT nextval ( 'bioentry_relationship_pk_seq' ) NOT NULL ,
object_bioentry_id INTEGER NOT NULL ,
subject_bioentry_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
rank INTEGER ,
PRIMARY KEY ( bioentry_relationship_id ) ,
UNIQUE ( object_bioentry_id , subject_bioentry_id , term_id ) ) ;
CREATE INDEX bioentryrel_trm ON bioentry_relationship ( term_id );
CREATE INDEX bioentryrel_child ON bioentry_relationship (subject_bioentry_id);
-- CONFIG: you may want to add this if you can't get the optimizer to
-- use the composite index for the initial keys
--CREATE INDEX bioentryrel_parent ON bioentry_relationship(object_bioentry_id);
-- for deep (depth > 1) bioentry relationship trees we need a transitive
-- closure table too
CREATE TABLE bioentry_path (
object_bioentry_id INTEGER NOT NULL ,
subject_bioentry_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
distance INTEGER,
UNIQUE ( object_bioentry_id , subject_bioentry_id , term_id , distance ) ) ;
CREATE INDEX bioentrypath_trm ON bioentry_path ( term_id );
CREATE INDEX bioentrypath_child ON bioentry_path ( subject_bioentry_id );
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX bioentrypath_parent ON bioentry_path(object_bioentry_id);
-- some bioentries will have a sequence
-- biosequence because sequence is sometimes a reserved word
CREATE TABLE biosequence (
bioentry_id INTEGER NOT NULL ,
version INTEGER ,
length INTEGER ,
alphabet VARCHAR ( 10 ) ,
seq TEXT ,
PRIMARY KEY ( bioentry_id ) ) ;
-- CONFIG: add these only if you want them:
-- ALTER TABLE biosequence ADD COLUMN ( isoelec_pt NUMERIC(4,2) );
-- ALTER TABLE biosequence ADD COLUMN ( mol_wgt DOUBLE PRECISION );
-- ALTER TABLE biosequence ADD COLUMN ( perc_gc DOUBLE PRECISION );
-- database cross-references (e.g., GenBank:AC123456.1)
--
-- Version may be unknown, may be undefined, or may not exist for a certain
-- accession or database (namespace). We require it here to avoid RDBMS-
-- dependend enforcement variants (version is in a compound alternative key),
-- and to simplify query construction for UK look-ups. If there is no version
-- the convention is to put 0 (zero) here. Likewise, a record with a version
-- of zero means the version is to be interpreted as NULL.
--
CREATE SEQUENCE dbxref_pk_seq;
CREATE TABLE dbxref (
dbxref_id INTEGER DEFAULT nextval ( 'dbxref_pk_seq' ) NOT NULL ,
dbname VARCHAR ( 40 ) NOT NULL ,
accession VARCHAR ( 128 ) NOT NULL ,
version INTEGER NOT NULL ,
PRIMARY KEY ( dbxref_id ) ,
UNIQUE ( accession , dbname , version ) ) ;
CREATE INDEX dbxref_db ON dbxref ( dbname );
-- for roundtripping embl/genbank, we need to have the "optional ID"
-- for the dbxref.
--
-- another use of this table could be for storing
-- descriptive text for a dbxref. for example, we may want to
-- know stuff about the interpro accessions we store (without
-- importing all of interpro), so we can attach the text
-- description as a synonym
CREATE TABLE dbxref_qualifier_value (
dbxref_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
rank INTEGER NOT NULL DEFAULT 0 ,
value TEXT ,
PRIMARY KEY ( dbxref_id , term_id , rank ) ) ;
CREATE INDEX dbxrefqual_dbx ON dbxref_qualifier_value ( dbxref_id );
CREATE INDEX dbxrefqual_trm ON dbxref_qualifier_value ( term_id );
-- Direct dblinks. It is tempting to do this
-- from bioentry_id to bioentry_id. But that wont work
-- during updates of one database - we will have to edit
-- this table each time. Better to do the join through accession
-- and db each time. Should be almost as cheap
CREATE TABLE bioentry_dbxref (
bioentry_id INTEGER NOT NULL ,
dbxref_id INTEGER NOT NULL ,
rank INTEGER ,
PRIMARY KEY ( bioentry_id , dbxref_id ) ) ;
CREATE INDEX dblink_dbx ON bioentry_dbxref ( dbxref_id );
-- We can have multiple references per bioentry, but one reference
-- can also be used for the same bioentry.
--
-- No two references can reference the same reference database entry
-- (dbxref_id). This is where the MEDLINE id goes: PUBMED:123456.
CREATE SEQUENCE reference_pk_seq;
CREATE TABLE reference (
reference_id INTEGER DEFAULT nextval ( 'reference_pk_seq' ) NOT NULL ,
dbxref_id INTEGER ,
location TEXT NOT NULL ,
title TEXT ,
authors TEXT ,
crc VARCHAR ( 32 ) ,
PRIMARY KEY ( reference_id ) ,
UNIQUE ( dbxref_id ) ,
UNIQUE ( crc ) ) ;
-- bioentry to reference associations
CREATE TABLE bioentry_reference (
bioentry_id INTEGER NOT NULL ,
reference_id INTEGER NOT NULL ,
start_pos INTEGER ,
end_pos INTEGER ,
rank INTEGER NOT NULL DEFAULT 0 ,
PRIMARY KEY ( bioentry_id , reference_id , rank ) ) ;
CREATE INDEX bioentryref_ref ON bioentry_reference ( reference_id );
-- We can have multiple comments per seqentry, and
-- comments can have embedded '\n' characters
CREATE SEQUENCE comment_pk_seq;
CREATE TABLE comment (
comment_id INTEGER DEFAULT nextval ( 'comment_pk_seq' ) NOT NULL ,
bioentry_id INTEGER NOT NULL ,
comment_text TEXT NOT NULL ,
rank INTEGER NOT NULL DEFAULT 0 ,
PRIMARY KEY ( comment_id ) ,
UNIQUE ( bioentry_id , rank ) ) ;
-- tag/value and ontology term annotation for bioentries goes here
CREATE TABLE bioentry_qualifier_value (
bioentry_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
value TEXT ,
rank INTEGER NOT NULL DEFAULT 0 ,
UNIQUE ( bioentry_id , term_id , rank ) ) ;
CREATE INDEX bioentryqual_trm ON bioentry_qualifier_value ( term_id );
-- feature table. We cleanly handle
-- - simple locations
-- - split locations
-- - split locations on remote sequences
CREATE SEQUENCE seqfeature_pk_seq;
CREATE TABLE seqfeature (
seqfeature_id INTEGER DEFAULT nextval ( 'seqfeature_pk_seq' ) NOT NULL ,
bioentry_id INTEGER NOT NULL ,
type_term_id INTEGER NOT NULL ,
source_term_id INTEGER NOT NULL ,
display_name VARCHAR ( 64 ) ,
rank INTEGER NOT NULL DEFAULT 0 ,
PRIMARY KEY ( seqfeature_id ) ,
UNIQUE ( bioentry_id , type_term_id , source_term_id , rank ) ) ;
CREATE INDEX seqfeature_trm ON seqfeature ( type_term_id );
CREATE INDEX seqfeature_fsrc ON seqfeature ( source_term_id );
-- CONFIG: you may want to add this if you can't get the optimizer to
-- use the composite index for the initial keys
--CREATE INDEX seqfeature_bioentryid ON seqfeature(bioentry_id);
-- seqfeatures can be arranged in containment hierarchies.
-- one can imagine storing other relationships between features,
-- in this case the term_id can be used to type the relationship
CREATE SEQUENCE seqfeature_relationship_pk_seq;
CREATE TABLE seqfeature_relationship (
seqfeature_relationship_id INTEGER DEFAULT nextval ( 'seqfeature_relationship_pk_seq' ) NOT NULL ,
object_seqfeature_id INTEGER NOT NULL ,
subject_seqfeature_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
rank INTEGER ,
PRIMARY KEY ( seqfeature_relationship_id ) ,
UNIQUE ( object_seqfeature_id , subject_seqfeature_id , term_id ) ) ;
CREATE INDEX seqfeaturerel_trm ON seqfeature_relationship ( term_id );
CREATE INDEX seqfeaturerel_child ON seqfeature_relationship ( subject_seqfeature_id );
-- CONFIG: you may want to add this if you can't get the optimizer to
-- use the composite index for the initial keys
--CREATE INDEX seqfeaturerel_parent ON seqfeature_relationship(object_seqfeature_id);
-- for deep (depth > 1) seqfeature relationship trees we need a transitive
-- closure table too
CREATE TABLE seqfeature_path (
object_seqfeature_id INTEGER NOT NULL ,
subject_seqfeature_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
distance INTEGER,
UNIQUE ( object_seqfeature_id , subject_seqfeature_id , term_id , distance ) ) ;
CREATE INDEX seqfeaturepath_trm ON seqfeature_path ( term_id );
CREATE INDEX seqfeaturepath_child ON seqfeature_path ( subject_seqfeature_id );
-- CONFIG: you may want to add this if you can't get the optimizer to
-- use the composite index for the initial keys
--CREATE INDEX seqfeaturerel_parent ON seqfeature_path(object_seqfeature_id);
-- tag/value associations - or ontology annotations
CREATE TABLE seqfeature_qualifier_value (
seqfeature_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
rank INTEGER NOT NULL DEFAULT 0 ,
value TEXT NOT NULL ,
PRIMARY KEY ( seqfeature_id , term_id , rank ) ) ;
CREATE INDEX seqfeaturequal_trm ON seqfeature_qualifier_value ( term_id );
-- DBXrefs for features. This is necessary for genome oriented viewpoints,
-- where you have a few have long sequences (contigs, or chromosomes) with many
-- features on them. In that case the features are the semantic scope for
-- their annotation bundles, not the bioentry they are attached to.
CREATE TABLE seqfeature_dbxref (
seqfeature_id INTEGER NOT NULL ,
dbxref_id INTEGER NOT NULL ,
rank INTEGER ,
PRIMARY KEY ( seqfeature_id , dbxref_id ) ) ;
CREATE INDEX feadblink_dbx ON seqfeature_dbxref ( dbxref_id );
-- basically we model everything as potentially having
-- any number of locations, ie, a split location. SimpleLocations
-- just have one location. We need to have a location id for the qualifier
-- associations of fuzzy locations.
--
-- please do not try to model complex assemblies with this thing. It wont
-- work. Check out the ensembl schema for this.
--
-- we allow nulls for start/end - this is useful for fuzzies as
-- standard range queries will not be included
--
-- for remote locations, the join to make is to DBXref
--
-- the FK to term is a possibility to store the type of the
-- location for determining in one hit whether it's a fuzzy or not
CREATE SEQUENCE location_pk_seq;
CREATE TABLE location (
location_id INTEGER DEFAULT nextval ( 'location_pk_seq' ) NOT NULL ,
seqfeature_id INTEGER NOT NULL ,
dbxref_id INTEGER ,
term_id INTEGER ,
start_pos INTEGER ,
end_pos INTEGER ,
strand INTEGER NOT NULL DEFAULT 0 ,
rank INTEGER NOT NULL DEFAULT 0 ,
PRIMARY KEY ( location_id ) ,
UNIQUE ( seqfeature_id , rank ) ) ;
CREATE INDEX seqfeatureloc_start ON location ( start_pos, end_pos );
CREATE INDEX seqfeatureloc_dbx ON location ( dbxref_id );
CREATE INDEX seqfeatureloc_trm ON location ( term_id );
-- location qualifiers - mainly intended for fuzzies but anything
-- can go in here
-- some controlled vocab terms have slots;
-- fuzzies could be modeled as min_start(5), max_start(5)
--
-- there is no restriction on extending the fuzzy ontology
-- for your own nefarious aims, although the bio* apis will
-- most likely ignore these
CREATE TABLE location_qualifier_value (
location_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
value VARCHAR ( 255 ) NOT NULL ,
int_value INTEGER ,
PRIMARY KEY ( location_id , term_id ) ) ;
CREATE INDEX locationqual_trm ON location_qualifier_value ( term_id );
--
-- Create the foreign key constraints
--
-- ontology term
ALTER TABLE term ADD CONSTRAINT FKont_term
FOREIGN KEY ( ontology_id ) REFERENCES ontology ( ontology_id )
ON DELETE CASCADE ;
-- term synonyms
ALTER TABLE term_synonym ADD CONSTRAINT FKterm_syn
FOREIGN KEY ( term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
-- term_dbxref
ALTER TABLE term_dbxref ADD CONSTRAINT FKdbxref_trmdbxref
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
ON DELETE CASCADE ;
ALTER TABLE term_dbxref ADD CONSTRAINT FKterm_trmdbxref
FOREIGN KEY ( term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
-- term_relationship
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmsubject_trmrel
FOREIGN KEY ( subject_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmpredicate_trmrel
FOREIGN KEY ( predicate_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmobject_trmrel
FOREIGN KEY ( object_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_relationship ADD CONSTRAINT FKontology_trmrel
FOREIGN KEY ( ontology_id ) REFERENCES ontology ( ontology_id )
ON DELETE CASCADE ;
-- term_relationship_term
ALTER TABLE term_relationship_term ADD CONSTRAINT FKtrmrel_trmreltrm
FOREIGN KEY (term_relationship_id) REFERENCES term_relationship(term_relationship_id)
ON DELETE CASCADE ;
ALTER TABLE term_relationship_term ADD CONSTRAINT FKtrm_trmreltrm
FOREIGN KEY (term_id) REFERENCES term(term_id)
ON DELETE CASCADE ;
-- term_path
ALTER TABLE term_path ADD CONSTRAINT FKtrmsubject_trmpath
FOREIGN KEY ( subject_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_path ADD CONSTRAINT FKtrmpredicate_trmpath
FOREIGN KEY ( predicate_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_path ADD CONSTRAINT FKtrmobject_trmpath
FOREIGN KEY ( object_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_path ADD CONSTRAINT FKontology_trmpath
FOREIGN KEY ( ontology_id ) REFERENCES ontology ( ontology_id )
ON DELETE CASCADE ;
-- taxon, taxon_name
-- unfortunately, we can't constrain parent_taxon_id as it is violated
-- occasionally by the downloads available from NCBI
-- ALTER TABLE taxon ADD CONSTRAINT FKtaxon_taxon
-- FOREIGN KEY ( parent_taxon_id ) REFERENCES taxon ( taxon_id )
-- DEFERRABLE;
ALTER TABLE taxon_name ADD CONSTRAINT FKtaxon_taxonname
FOREIGN KEY ( taxon_id ) REFERENCES taxon ( taxon_id )
ON DELETE CASCADE ;
-- bioentry
ALTER TABLE bioentry ADD CONSTRAINT FKtaxon_bioentry
FOREIGN KEY ( taxon_id ) REFERENCES taxon ( taxon_id ) ;
ALTER TABLE bioentry ADD CONSTRAINT FKbiodatabase_bioentry
FOREIGN KEY ( biodatabase_id ) REFERENCES biodatabase ( biodatabase_id ) ;
-- bioentry_relationship
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKterm_bioentryrel
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKparentent_bioentryrel
FOREIGN KEY ( object_bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKchildent_bioentryrel
FOREIGN KEY ( subject_bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
-- bioentry_path
ALTER TABLE bioentry_path ADD CONSTRAINT FKterm_bioentrypath
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE bioentry_path ADD CONSTRAINT FKparentent_bioentrypath
FOREIGN KEY ( object_bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
ALTER TABLE bioentry_path ADD CONSTRAINT FKchildent_bioentrypath
FOREIGN KEY ( subject_bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
-- biosequence
ALTER TABLE biosequence ADD CONSTRAINT FKbioentry_bioseq
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
-- comment
ALTER TABLE comment ADD CONSTRAINT FKbioentry_comment
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
-- bioentry_dbxref
ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKbioentry_dblink
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKdbxref_dblink
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
ON DELETE CASCADE ;
-- dbxref_qualifier_value
ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKtrm_dbxrefqual
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKdbxref_dbxrefqual
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
ON DELETE CASCADE ;
-- bioentry_reference
ALTER TABLE bioentry_reference ADD CONSTRAINT FKbioentry_entryref
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
ALTER TABLE bioentry_reference ADD CONSTRAINT FKreference_entryref
FOREIGN KEY ( reference_id ) REFERENCES reference ( reference_id )
ON DELETE CASCADE ;
-- reference
ALTER TABLE reference ADD CONSTRAINT FKdbxref_reference
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id ) ;
-- bioentry_qualifier_value
ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKbioentry_entqual
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKterm_entqual
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
-- seqfeature
ALTER TABLE seqfeature ADD CONSTRAINT FKterm_seqfeature
FOREIGN KEY ( type_term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE seqfeature ADD CONSTRAINT FKsourceterm_seqfeature
FOREIGN KEY ( source_term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE seqfeature ADD CONSTRAINT FKbioentry_seqfeature
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
-- seqfeature_relationship
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKterm_seqfeatrel
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKparentfeat_seqfeatrel
FOREIGN KEY ( object_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKchildfeat_seqfeatrel
FOREIGN KEY ( subject_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
-- seqfeature_path
ALTER TABLE seqfeature_path ADD CONSTRAINT FKterm_seqfeatpath
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE seqfeature_path ADD CONSTRAINT FKparentfeat_seqfeatpath
FOREIGN KEY ( object_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
ALTER TABLE seqfeature_path ADD CONSTRAINT FKchildfeat_seqfeatpath
FOREIGN KEY ( subject_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
-- seqfeature_qualifier_value
ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKterm_featqual
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKseqfeature_featqual
FOREIGN KEY ( seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
-- seqfeature_dbxref
ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKseqfeature_feadblink
FOREIGN KEY ( seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKdbxref_feadblink
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
ON DELETE CASCADE ;
-- location
ALTER TABLE location ADD CONSTRAINT FKseqfeature_location
FOREIGN KEY ( seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
ALTER TABLE location ADD CONSTRAINT FKdbxref_location
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id ) ;
ALTER TABLE location ADD CONSTRAINT FKterm_featloc
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
-- location_qualifier_value
ALTER TABLE location_qualifier_value ADD CONSTRAINT FKfeatloc_locqual
FOREIGN KEY ( location_id ) REFERENCES location ( location_id )
ON DELETE CASCADE ;
ALTER TABLE location_qualifier_value ADD CONSTRAINT FKterm_locqual
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
--
-- This is to solve a problem arising from how transactions are implemented
-- in Postgres as opposed to, e.g., Oracle and InnoDB (MySQL). In short, the
-- difference is that in the latter RDBMSs' implementation, if a particular
-- statement within a transaction fails, the preceding (and possibly
-- subsequent) statements are still valid. On commit, all succeeded statements
-- are committed. In Postgres, the failure of a statement invalidates all
-- preceding statements within the same transaction as well as all subsequent,
-- if any.
--
-- This leads to a problem if you program SQL insert and update statements
-- such that presence of the record you attempt to insert is indicated by
-- failure of the statement due to a unique key constraint violation. Even
-- if your code is prepared to handle the failure by e.g. looking up the
-- record, in the case of Postgres this approach cannot work unless you
-- commit every single statement.
--
-- The bioperl-db adaptor code uses the aforementioned approach and is
-- currently dependent on the following support code. If you are not going
-- to use bioperl-db to populate the database, you may comment out all
-- rules, as then they might add another look-up to one already done on the
-- code that you use and hence add unnecessary overhead.
--
CREATE RULE rule_bioentry_i1
AS ON INSERT TO bioentry
WHERE (
SELECT bioentry_id FROM bioentry
WHERE identifier = new.identifier
AND biodatabase_id = new.biodatabase_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_i2
AS ON INSERT TO bioentry
WHERE (
SELECT bioentry_id FROM bioentry
WHERE accession = new.accession
AND biodatabase_id = new.biodatabase_id
AND version = new.version
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_biodatabase_i
AS ON INSERT TO biodatabase
WHERE (
SELECT biodatabase_id FROM biodatabase
WHERE name = new.name
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_dbxref_i
AS ON INSERT TO bioentry_dbxref
WHERE (
SELECT dbxref_id FROM bioentry_dbxref
WHERE bioentry_id = new.bioentry_id
AND dbxref_id = new.dbxref_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_path_i
AS ON INSERT TO bioentry_path
WHERE (
SELECT bioentry_relationship_id FROM bioentry_relationship
WHERE object_bioentry_id = new.object_bioentry_id
AND subject_bioentry_id= new.subject_bioentry_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_qualifier_value_i
AS ON INSERT TO bioentry_qualifier_value
WHERE (
SELECT bioentry_id FROM bioentry_qualifier_value
WHERE bioentry_id = new.bioentry_id
AND term_id = new.term_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_reference_i
AS ON INSERT TO bioentry_reference
WHERE (
SELECT bioentry_id FROM bioentry_reference
WHERE bioentry_id = new.bioentry_id
AND reference_id = new.reference_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_relationship_i
AS ON INSERT TO bioentry_relationship
WHERE (
SELECT bioentry_relationship_id FROM bioentry_relationship
WHERE object_bioentry_id = new.object_bioentry_id
AND subject_bioentry_id= new.subject_bioentry_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_biosequence_i
AS ON INSERT TO biosequence
WHERE (
SELECT bioentry_id FROM biosequence
WHERE bioentry_id = new.bioentry_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_comment_i
AS ON INSERT TO comment
WHERE (
SELECT comment_id FROM comment
WHERE bioentry_id = new.bioentry_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_dbxref_i
AS ON INSERT TO dbxref
WHERE (
SELECT dbxref_id FROM dbxref
WHERE accession = new.accession
AND dbname = new.dbname
AND version = new.version
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_dbxref_qualifier_value_i
AS ON INSERT TO dbxref_qualifier_value
WHERE (
SELECT dbxref_id FROM dbxref_qualifier_value
WHERE dbxref_id = new.dbxref_id
AND term_id = new.term_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_location_i
AS ON INSERT TO location
WHERE (
SELECT location_id FROM location
WHERE seqfeature_id = new.seqfeature_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_location_qualifier_value_i
AS ON INSERT TO location_qualifier_value
WHERE (
SELECT location_id FROM location_qualifier_value
WHERE location_id = new.location_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_ontology_i
AS ON INSERT TO ontology
WHERE (
SELECT ontology_id FROM ontology
WHERE name = new.name
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_reference_i1
AS ON INSERT TO reference
WHERE (
SELECT reference_id FROM reference
WHERE crc = new.crc
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_reference_i2
AS ON INSERT TO reference
WHERE (
SELECT reference_id FROM reference
WHERE dbxref_id = new.dbxref_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_seqfeature_i
AS ON INSERT TO seqfeature
WHERE (
SELECT seqfeature_id FROM seqfeature
WHERE bioentry_id = new.bioentry_id
AND type_term_id = new.type_term_id
AND source_term_id = new.source_term_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_seqfeature_dbxref_i
AS ON INSERT TO seqfeature_dbxref
WHERE (
SELECT seqfeature_id FROM seqfeature_dbxref
WHERE seqfeature_id = new.seqfeature_id
AND dbxref_id = new.dbxref_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_seqfeature_path_i
AS ON INSERT TO seqfeature_path
WHERE (
SELECT subject_seqfeature_id FROM seqfeature_path
WHERE object_seqfeature_id = new.object_seqfeature_id
AND subject_seqfeature_id= new.subject_seqfeature_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_seqfeature_qualifier_value_i
AS ON INSERT TO seqfeature_qualifier_value
WHERE (
SELECT seqfeature_id FROM seqfeature_qualifier_value
WHERE seqfeature_id = new.seqfeature_id
AND term_id = new.term_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_seqfeature_relationship_i
AS ON INSERT TO seqfeature_relationship
WHERE (
SELECT subject_seqfeature_id FROM seqfeature_relationship
WHERE object_seqfeature_id = new.object_seqfeature_id
AND subject_seqfeature_id= new.subject_seqfeature_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_taxon_i
AS ON INSERT TO taxon
WHERE (
SELECT taxon_id FROM taxon
WHERE ncbi_taxon_id = new.ncbi_taxon_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_taxon_name_i
AS ON INSERT TO taxon_name
WHERE (
SELECT taxon_id FROM taxon_name
WHERE taxon_id = new.taxon_id
AND name = new.name
AND name_class = new.name_class
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_i1
AS ON INSERT TO term
WHERE (
SELECT term_id FROM term
WHERE identifier = new.identifier
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_i2
AS ON INSERT TO term
WHERE (
SELECT term_id FROM term
WHERE name = new.name
AND ontology_id = new.ontology_id
AND is_obsolete = new.is_obsolete
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_dbxref_i
AS ON INSERT TO term_dbxref
WHERE (
SELECT dbxref_id FROM term_dbxref
WHERE dbxref_id = new.dbxref_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_path_i
AS ON INSERT TO term_path
WHERE (
SELECT subject_term_id FROM term_path
WHERE subject_term_id = new.subject_term_id
AND predicate_term_id = new.predicate_term_id
AND object_term_id = new.object_term_id
AND ontology_id = new.ontology_id
AND distance = new.distance
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_relationship_i
AS ON INSERT TO term_relationship
WHERE (
SELECT term_relationship_id FROM term_relationship
WHERE subject_term_id = new.subject_term_id
AND predicate_term_id = new.predicate_term_id
AND object_term_id = new.object_term_id
AND ontology_id = new.ontology_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_relationship_term_i1
AS ON INSERT TO term_relationship_term
WHERE (
SELECT term_relationship_id FROM term_relationship_term
WHERE term_relationship_id = new.term_relationship_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_relationship_term_i2
AS ON INSERT TO term_relationship_term
WHERE (
SELECT term_id FROM term_relationship_term
WHERE term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_synonym_i
AS ON INSERT TO term_synonym
WHERE (
SELECT term_id FROM term_synonym
WHERE synonym = new.synonym
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
--
-- Functions that may be used as an API by applications, e.g. load scripts etc.
--
-- this is used by load_ncbi_taxonomy.pl to speed up loading into the taxon
-- table by 1 to 2 orders of magnitude
CREATE OR REPLACE FUNCTION unconstrain_taxon ()
RETURNS INTEGER
AS
'
DROP RULE rule_taxon_i ON taxon;
SELECT 1;
'
LANGUAGE SQL
VOLATILE STRICT SECURITY DEFINER
;
-- this function re-establishes what unconstrain_taxon() removed temporarily
CREATE OR REPLACE FUNCTION constrain_taxon ()
RETURNS INTEGER
AS
'
CREATE RULE rule_taxon_i
AS ON INSERT TO taxon
WHERE (
SELECT taxon_id FROM taxon
WHERE ncbi_taxon_id = new.ncbi_taxon_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
SELECT 1;
'
LANGUAGE SQL
VOLATILE STRICT SECURITY DEFINER
;