Contents

1 Introduction

MSEADbi makes it easy to generate your own annotation package for MSEA. The package design is the same as MeSHDbi or LRBaseDbi, and the usage is also the same.

This document provides the way to use MSEADbi and MSEA.XXX.pb.db packages. MSEA.XXX.pb.db packages provide the metabolite set by pathway for XXX species in the PathBank database. The packages are generated by the MSEADbi package. MSEADbi has a role; the construction of MSEA.XXX.pb.db packages. The makeMSEAPackage function of MSEADbi generates the user’s original MSEA.XXX.pb.db packages.

2 makeMSEAPackage

Here we use makeMSEAPackage function to create a MSEA.XXX.pb.db package. Only user have to specify are 1. a csv file that is filtered to include only the lines of a species in PathBank Data File (Metabolite names linked to PathBank primary pathways CSV) and 2. a meta table describing the PathBank data. Here we use the demo data of Arabidopsis thaliana of PathBank primary pathways CSV.

library('MSEADbi')
## Warning: Package 'MSEADbi' is deprecated and will be removed from Bioconductor
##   version 3.14. Please see replacement package AHPathbankDbs
tmp <- tempdir()

ath <- system.file("extdata","MSEA.Ath.pb.db_DATA.csv",package="MSEADbi")
meta <- system.file("extdata","MSEA.Ath.pb.db_METADATA.csv",package="MSEADbi")
athDf <- read.csv(ath, fileEncoding="utf8")
metaDf <- read.csv(meta)
# We need to avoid DOT from the column names (to query with the names)
names(athDf) <- gsub("\\.", "", names(athDf))
names(metaDf) <- gsub("\\.", "", names(metaDf))

makeMSEAPackage(pkgname = "MSEA.Ath.pb.db", data=athDf, metadata=metaDf,
organism = "Arabidopsis thaliana", version = "0.99.0",
maintainer = "Kozo Nishida <[email protected]>", author = "Kozo Nishida",
destDir = tmp, license = "Artistic-2.0")
## Creating package in /tmp/RtmpETO3HX/MSEA.Ath.pb.db
mseaPackageDir = paste(tmp, "MSEA.Ath.pb.db", sep="/")
install.packages(mseaPackageDir, repos=NULL, type="source")
## Installing package into '/tmp/RtmpwXpl9Z/Rinst980bd49e57841'
## (as 'lib' is unspecified)

3 columns, keytypes, and select

All MSEA.XXX.pb.db package has same name object and it is instantiated by MSEADbclass. Many data access function for this object are implmented. For example, columns returns the rows which we can retrieve in MSEA.XXX.pb.db packages. keytypes returns the rows which can be used as the optional parameter in keys and select functions against MSEA.XXX.pb.db packages. select function returns the rows in particular columns, which are having user-specified keys. This function returns the result as a dataframe.

library(AnnotationDbi)
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     Filter, Find, Map, Position, Reduce, anyDuplicated, append,
##     as.data.frame, basename, cbind, colnames, dirname, do.call,
##     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
##     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     I, expand.grid, unname
library(MSEA.Ath.pb.db)

columns(MSEA.Ath.pb.db)
##  [1] "CAS"            "ChEBIID"        "DrugBankID"     "Formula"       
##  [5] "HMDBID"         "IUPAC"          "InChI"          "InChIKey"      
##  [9] "KEGGID"         "MetaboliteID"   "MetaboliteName" "PathBankID"    
## [13] "PathwayName"    "PathwaySubject" "SMILES"
keytypes(MSEA.Ath.pb.db)
##  [1] "CAS"            "ChEBIID"        "DrugBankID"     "Formula"       
##  [5] "HMDBID"         "IUPAC"          "InChI"          "InChIKey"      
##  [9] "KEGGID"         "MetaboliteID"   "MetaboliteName" "PathBankID"    
## [13] "PathwayName"    "PathwaySubject" "SMILES"
ids <- c('SMP0012018', 'SMP0012019')
select(MSEA.Ath.pb.db, ids, c("MetaboliteID", "CAS", "HMDBID", "ChEBIID", 
    "KEGGID"), "PathBankID")
##    MetaboliteID        CAS      HMDBID ChEBIID  KEGGID
## 1    PW_C000437  7235-40-7 HMDB0000561   17579  C02094
## 2    PW_C057881 13312-52-2 HMDB0112264   67188  C20484
## 3    PW_C009794  7439-89-6 HMDB0015531   18248  C00023
## 4    PW_C001065  7782-44-7 HMDB0001377   15379  C00007
## 5    PW_C057882                          67192  C20692
## 6    PW_C017456    79-77-6 HMDB0036565   32325  C12287
## 7    PW_C000544 15438-31-0 HMDB0000692   29033  C14818
## 8    PW_C057883                          67190  C20693
## 9    PW_C057884                          67191  C20694
## 10   PW_C057885                             NA        
## 11   PW_C040034            HMDB0059597   15378  C00080
## 12   PW_C001799 14875-96-8 HMDB0003178   17627  C00032
## 13   PW_C057879                          81466  C18037
## 14   PW_C001778   126-29-4 HMDB0003101      NA  C08614
## 15   PW_C001757 30743-41-0 HMDB0003020   25501  C08606
## 16   PW_C057886 14660-91-4               35306  C13431
## 18   PW_C057887                          32304  C13453
## 19   PW_C057888                          34596  C14044
## 21   PW_C000721    53-84-9 HMDB0000902   15846  C00003
## 22   PW_C001144    58-68-4 HMDB0001487   16908  C00004
## 24   PW_C057889                          31157 C13455 
## 25   PW_C057878                          71302        
## 26   PW_C001420  7732-18-5 HMDB0002111   15377  C00001
## 27   PW_C016115 21293-29-8 HMDB0035140   18743  C06082
## 28   PW_C001783  7722-84-1 HMDB0003125   16240  C00027
cls <- columns(MSEA.Ath.pb.db)
kts <- keytypes(MSEA.Ath.pb.db)
kt <- kts[2]
ks <- head(keys(MSEA.Ath.pb.db, keytype = kts[2]))
res <- select(MSEA.Ath.pb.db, keys = ks, columns = cls, keytype = kt)
head(res)
##          CAS ChEBIID DrugBankID Formula      HMDBID
## 1  7235-40-7   17579             C40H56 HMDB0000561
## 2 13312-52-2   67188             C40H56 HMDB0112264
## 3  7439-89-6   18248    DB01592      Fe HMDB0015531
## 4  7782-44-7   15379                 O2 HMDB0001377
## 5              67192            C27H36O            
## 6    79-77-6   32325            C13H20O HMDB0036565
##                                                                                                                                                                       IUPAC
## 1 1,3,3-trimethyl-2-[(1E,3E,5E,7E,9Z,11Z,13E,15E,17E)-3,7,12,16-tetramethyl-18-(2,6,6-trimethylcyclohex-1-en-1-yl)octadeca-1,3,5,7,9,11,13,15,17-nonaen-1-yl]cyclohex-1-ene
## 2 1,3,3-trimethyl-2-[(1E,3E,5E,7E,9E,11E,13E,15Z,17E)-3,7,12,16-tetramethyl-18-(2,6,6-trimethylcyclohex-1-en-1-yl)octadeca-1,3,5,7,9,11,13,15,17-nonaen-1-yl]cyclohex-1-ene
## 3                                                                                                                                                      lambda2-iron(2+) ion
## 4                                                                                                                                                                 oxidanone
## 5                                                     (2E,4E,6E,8E,10E,12Z,14E)-4,9,13-trimethyl-15-(2,6,6-trimethylcyclohex-1-en-1-yl)pentadeca-2,4,6,8,10,12,14-heptaenal
## 6                                                                                                                  (3E)-4-(2,6,6-trimethylcyclohex-1-en-1-yl)but-3-en-2-one
##                                                                                                                                                                                                                                                  InChI
## 1 InChI=1S/C40H56/c1-31(19-13-21-33(3)25-27-37-35(5)23-15-29-39(37,7)8)17-11-12-18-32(2)20-14-22-34(4)26-28-38-36(6)24-16-30-40(38,9)10/h11-14,17-22,25-28H,15-16,23-24,29-30H2,1-10H3/b12-11+,19-13+,20-14+,27-25+,28-26+,31-17+,32-18+,33-21+,34-22+
## 2 InChI=1S/C40H56/c1-31(19-13-21-33(3)25-27-37-35(5)23-15-29-39(37,7)8)17-11-12-18-32(2)20-14-22-34(4)26-28-38-36(6)24-16-30-40(38,9)10/h11-14,17-22,25-28H,15-16,23-24,29-30H2,1-10H3/b12-11+,19-13+,20-14+,27-25+,28-26+,31-17+,32-18+,33-21-,34-22+
## 3                                                                                                                                                                                                                                      InChI=1S/Fe/q+2
## 4                                                                                                                                                                                                                                     InChI=1S/O2/c1-2
## 5                                                                 InChI=1S/C27H36O/c1-22(12-7-8-13-23(2)16-11-21-28)14-9-15-24(3)18-19-26-25(4)17-10-20-27(26,5)6/h7-9,11-16,18-19,21H,10,17,20H2,1-6H3/b8-7+,14-9+,16-11+,19-18+,22-12+,23-13+,24-15-
## 6                                                                                                                                                                      InChI=1S/C13H20O/c1-10-6-5-9-13(3,4)12(10)8-7-11(2)14/h7-8H,5-6,9H2,1-4H3/b8-7+
##                      InChIKey KEGGID MetaboliteID            MetaboliteName
## 1 OENHQHLEOONYIE-JLTXGRSLSA-N C02094   PW_C000437                B-Carotene
## 2 OENHQHLEOONYIE-BVZAMQQESA-N C20484   PW_C057881          9-cis-β-Carotene
## 3 CWYNVVGOOAEACU-UHFFFAOYSA-N C00023   PW_C009794                      Iron
## 4 MYMOFIZGZYHOMD-UHFFFAOYSA-N C00007   PW_C001065                    Oxygen
## 5 PJEHRCCPERVGEC-OLTPUEGQSA-N C20692   PW_C057882 9-cis-10'-apo-β-carotenal
## 6 PSQYTAPXSHCGMF-BQYQJAHWSA-N C12287   PW_C017456                  β-Ionone
##   PathBankID                 PathwayName PathwaySubject
## 1 SMP0012018 5-Deoxystrigol Biosynthesis      Metabolic
## 2 SMP0012018 5-Deoxystrigol Biosynthesis      Metabolic
## 3 SMP0012018 5-Deoxystrigol Biosynthesis      Metabolic
## 4 SMP0012018 5-Deoxystrigol Biosynthesis      Metabolic
## 5 SMP0012018 5-Deoxystrigol Biosynthesis      Metabolic
## 6 SMP0012018 5-Deoxystrigol Biosynthesis      Metabolic
##                                                                                          SMILES
## 1  C\\C(\\C=C\\C=C(/C)\\C=C\\C1=C(C)CCCC1(C)C)=C/C=C/C=C(\\C)/C=C/C=C(\\C)/C=C/C1=C(C)CCCC1(C)C
## 2 C/C(/C=C/C=C(/C)\\C=C\\C1=C(C)CCCC1(C)C)=C\\C=C\\C=C(/C)\\C=C\\C=C(/C)\\C=C\\C1=C(C)CCCC1(C)C
## 3                                                                                        [Fe++]
## 4                                                                                           O=O
## 5                     [H]C(=O)\\C=C\\C(\\C)=C\\C=C\\C=C(/C)\\C=C\\C=C(\\C)/C=C/C1=C(C)CCCC1(C)C
## 6                                                                 CC(=O)\\C=C\\C1=C(C)CCCC1(C)C

4 Other functions

Other additional functions like species, dbInfo, and dbconn are available. In each MSEA.XXX.pb.db package, species function returns the common name. dbInfo returns the information of the package. dbfile returns the directory where sqlite file is stored. dbschema returns the schema of database. dbconn returns the connection to the sqlite database.

species(MSEA.Ath.pb.db)
## [1] "Thale cress"
dbInfo(MSEA.Ath.pb.db)
##              NAME
## 1     SOURCENAME1
## 2      SOURCEURL1
## 3        DBSCHEMA
## 4 DBSCHEMAVERSION
## 5        ORGANISM
## 6         SPECIES
## 7         package
## 8         Db type
## 9     MSEAVERSION
##                                                                VALUE
## 1                                                           PathBank
## 2 http://pathbank.org/downloads/pathbank_primary_metabolites.csv.zip
## 3                                                     MSEA.Ath.pb.db
## 4                                                              1.0.0
## 5                                               Arabidopsis thaliana
## 6                                                        Thale cress
## 7                                                      AnnotationDbi
## 8                                                             MSEADb
## 9                                                               2020
dbfile(MSEA.Ath.pb.db)
## [1] "/tmp/RtmpwXpl9Z/Rinst980bd49e57841/MSEA.Ath.pb.db/extdata/MSEA.Ath.pb.db.sqlite"
dbschema(MSEA.Ath.pb.db)
## [1] "CREATE TABLE `METADATA` (\n  `NAME` TEXT,\n  `VALUE` TEXT\n)"                                                                                                                                                                                                                                                                           
## [2] "CREATE TABLE `DATA` (\n  `PathBankID` TEXT,\n  `PathwayName` TEXT,\n  `PathwaySubject` TEXT,\n  `MetaboliteID` TEXT,\n  `MetaboliteName` TEXT,\n  `HMDBID` TEXT,\n  `KEGGID` TEXT,\n  `ChEBIID` REAL,\n  `DrugBankID` TEXT,\n  `CAS` TEXT,\n  `Formula` TEXT,\n  `IUPAC` TEXT,\n  `SMILES` TEXT,\n  `InChI` TEXT,\n  `InChIKey` TEXT\n)"
dbconn(MSEA.Ath.pb.db)
## <SQLiteConnection>
##   Path: /tmp/RtmpwXpl9Z/Rinst980bd49e57841/MSEA.Ath.pb.db/extdata/MSEA.Ath.pb.db.sqlite
##   Extensions: TRUE

5 References

FILL bibtex for PathBank

Session information

Here is the output of sessionInfo() on the system on which this document was compiled:

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.13-bioc/R/lib/libRblas.so
## LAPACK: /home/biocbuild/bbs-3.13-bioc/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB              LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
## [1] MSEA.Ath.pb.db_0.99.0 AnnotationDbi_1.54.0  IRanges_2.26.0       
## [4] S4Vectors_0.30.0      Biobase_2.52.0        BiocGenerics_0.38.0  
## [7] MSEADbi_1.2.0         BiocStyle_2.20.0     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.6             XVector_0.32.0         GenomeInfoDb_1.28.0   
##  [4] bslib_0.2.5.1          compiler_4.1.0         BiocManager_1.30.15   
##  [7] jquerylib_0.1.4        zlibbioc_1.38.0        bitops_1.0-7          
## [10] tools_4.1.0            digest_0.6.27          bit_4.0.4             
## [13] jsonlite_1.7.2         RSQLite_2.2.7          evaluate_0.14         
## [16] memoise_2.0.0          pkgconfig_2.0.3        png_0.1-7             
## [19] rlang_0.4.11           DBI_1.1.1              yaml_2.2.1            
## [22] xfun_0.23              fastmap_1.1.0          GenomeInfoDbData_1.2.6
## [25] httr_1.4.2             stringr_1.4.0          knitr_1.33            
## [28] Biostrings_2.60.0      sass_0.4.0             vctrs_0.3.8           
## [31] bit64_4.0.5            R6_2.5.0               rmarkdown_2.8         
## [34] bookdown_0.22          blob_1.2.1             magrittr_2.0.1        
## [37] htmltools_0.5.1.1      KEGGREST_1.32.0        stringi_1.6.2         
## [40] RCurl_1.98-1.3         cachem_1.0.5           crayon_1.4.1