In this demo, we illustrate how to examine the modular structure of a network by merging both pathway crosstalks (see the demos neuropsychiatric disorders and inflammatory disorders), identifying functional modules with therapeutic potentials (enriched for approved drug targets) and evolutionary origins (co-arising of inflammatory disorder-specific modules with the speciation of bony fish).
# Please install R (version 4.3.2 or above)
# see https://cran.r-project.org
# if the package 'BiocManager' not installed, please do so
if(!("BiocManager" %in% rownames(installed.packages()))) install.packages("BiocManager")
# first, install basic packages: remotes, tidyverse, pbapply, patchwork
BiocManager::install(c('remotes','tidyverse','pbapply','patchwork'), dependencies=T)
# then, install the package 'PIPE' (now hosted at github)
BiocManager::install("hfang-bristol/PIPE", dependencies=T, force=T)
# check the package 'PIPE' successfully installed
library(PIPE)
# load packages
library(PIPE)
library(tidyverse)
library(patchwork)
# specify built-in data placeholder
placeholder <- "http://www.comptransmed.pro/bigdata_pipe"
# create the output folder 'pipe_showcase'
outdir <- 'pipe_showcase'
if(!dir.exists(outdir)) dir.create(outdir)
Load results for neuropsychiatric disorders (NPD) generated in the demo neuropsychiatric disorders, including target gene prioritisation (dTarget_NPD.RData
) and pathway crosstalk (subg_NPD.RData
).
Load results for inflammatory disorders (IND) generated in the demo inflammatory disorders, including target gene prioritisation (dTarget_IND.RData
) and pathway crosstalk (subg_IND.RData
).
# load results for neuropsychiatric disorders (NPD)
## target gene prioritisation
dTarget_NPD <- load(file.path(outdir,'dTarget_NPD.RData')) %>% get()
## pathway crosstalk
subg_NPD <- load(file.path(outdir,'subg_NPD.RData')) %>% get()
# load results for inflammatory disorders (IND)
## target gene prioritisation
dTarget_IND <- load(file.path(outdir,'dTarget_IND.RData')) %>% get()
## pathway crosstalk
subg_IND <- load(file.path(outdir,'subg_IND.RData')) %>% get()
# merge both pathway crosstalks
subg_both <- list(subg_IND,subg_NPD) %>% oCombineNet() %>% delete_vertex_attr("priority") %>% oLayout("graphlayouts.layout_with_stress")
# identify modules
set.seed(825)
V(subg_both)$modules <- subg_both %>% cluster_spinglass(spins=7) %>% membership()
table(V(subg_both)$modules)
# reorder by the module size (optional)
df <- V(subg_both)$modules %>% enframe()
df1 <- df %>% count(value) %>% mutate(new=rank(n,ties.method="first"))
df <- df %>% inner_join(df1, by='value') %>% transmute(name, value=new)
V(subg_both)$modules <- deframe(df)
table(V(subg_both)$modules)
# visualise modules
## add coordinates (within and between modules)
subg_both <- subg_both %>% oAddCoords("modules", glayout=layout_with_kk, edge.color.alternative=c("grey70","grey95"))
## do visualisation
gg <- subg_both %>% oGGnetwork(node.xcoord="xcoord", node.ycoord="ycoord", node.color="modules", colormap="sci_locuszoom", node.color.alpha=0.5, node.size.range=3, edge.color="color", edge.arrow.gap=0)
## also add gene label
gg_label <- gg + ggrepel::geom_text_repel(data=gg$data %>% distinct(x,y,name), aes(x,y,label=name), color='black', force=2, size=1.8, alpha=0.7, show.legend=F, segment.alpha=0.5, segment.color="grey70", segment.size=0.2, seed=825, max.overlaps=Inf)
# save into a file 'MOD_module.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_module.pdf'), gg, width=6, height=6)
# save into a file 'MOD_module2.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_module2.pdf'), gg_label, width=6, height=6)
# write into a file 'MOD_module.txt' under the folder 'pipe_showcase'
ig <- subg_both
ind <- match(V(ig)$name, dTarget_NPD$priority$name)
V(ig)$NPD_priority <- dTarget_NPD$priority$rating[ind]
ind <- match(V(ig)$name, dTarget_IND$priority$name)
V(ig)$IND_priority <- dTarget_IND$priority$rating[ind]
oIG2TB(ig,'nodes') %>% transmute(name, modules=str_c('M',modules), NPD_priority, IND_priority, description) %>% arrange(modules,name) %>% write_delim(file.path(outdir,'MOD_module.txt'), delim='\t')
Figure 5.1: Modular visualisation of a network merging two pathway crosstalks. Modules are color-coded as indicated, with member genes also color-coded and labelled.
The module information stored in the output file MOD_module.txt
above can be explored below.
# extract genes per module
df_modules <- subg_both %>% igraph::as_data_frame("vertices") %>% as_tibble() %>% transmute(name, modules=str_c('M',modules)) %>% arrange(modules)
# reveal functional modules
## ls_modules
ls_modules <- df_modules %>% select(name,modules) %>% nest(data=-modules) %>% mutate(gene=map(data,~pull(.x,name))) %>% select(-data) %>% deframe()
## background
background <- df_modules %>% pull(name)
## sets
sets <- tibble(onto='KEGG') %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
## esad
esad <- oSEAadv(ls_modules, sets, background=background, size.range=c(10,500), min.overlap=5)
# balloon plot
## top two enrichments per module
gp <- esad %>% oSEAextract() %>% filter(adjp<1e-2, namespace %in% 'Environmental Process') %>% group_by(group) %>% top_n(2,-adjp) %>% oSEAballoon(top=5, adjp.cutoff=0.05, zlim=c(0,15), slim=c(0,20), size.range=c(1,6), shape=19, colormap="grey90-orange-darkred")
gp_kegg <- gp + theme(axis.text.y=element_text(size=6),axis.text.x=element_text(size=6,angle=0),strip.text.y=element_text(size=6,angle=0))
# save into a file 'MOD_module_kegg.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_module_kegg.pdf'), gp_kegg, width=5, height=2)
# write into a file 'MOD_module_kegg.txt' under the folder 'pipe_showcase'
esad %>% oSEAextract() %>% filter(adjp<1e-2, namespace %in% 'Environmental Process') %>% group_by(group) %>% top_n(2,-adjp) %>% arrange(group,adjp) %>% write_delim(file.path(outdir,'MOD_module_kegg.txt'), delim='\t')
Figure 6.1: KEGG enrichment analysis for genes within each of five modules (M1 – M5).
Enriched pathways stored in the output file MOD_module_kegg.txt
above can be explored below.
# neuropsychiatric disorder: gp_rating_npd
ig <- subg_both
ind <- match(V(ig)$name, dTarget_NPD$priority$name)
V(ig)$priority <- dTarget_NPD$priority$rating[ind]
gp_rating_npd <- oGGnetwork(ig, node.label='label', node.label.size=1, node.label.color='black', node.label.alpha=0.8, node.label.padding=0.1, node.label.arrow=0, node.label.force=0.01, node.shape=19, node.xcoord='xcoord', node.ycoord='ycoord', node.color='priority', node.color.title='Credit\nscore', colormap='spectral', zlim=c(0,10), node.color.alpha=0.9, node.size.range=3, edge.color="color",edge.color.alpha=0.5,edge.curve=0,edge.arrow.gap=0)
gp_rating_npd <- gp_rating_npd + ggtitle('Neuropsychiatric disorders')
# inflammatory disorder: gp_rating_ind
ig <- subg_both
ind <- match(V(ig)$name, dTarget_IND$priority$name)
V(ig)$priority <- dTarget_IND$priority$rating[ind]
gp_rating_ind <- oGGnetwork(ig, node.label='label', node.label.size=1, node.label.color='black', node.label.alpha=0.8, node.label.padding=0.1, node.label.arrow=0, node.label.force=0.01, node.shape=19, node.xcoord='xcoord', node.ycoord='ycoord', node.color='priority', node.color.title='Credit\nscore', colormap='spectral', zlim=c(0,10), node.color.alpha=0.9, node.size.range=3, edge.color="color",edge.color.alpha=0.5,edge.curve=0,edge.arrow.gap=0)
gp_rating_ind <- gp_rating_ind + ggtitle('Inflammatory disorders')
# gp_both
gp_both <- list(gp_rating_npd,gp_rating_ind) %>% wrap_plots(ncol=2, guides='collect')
# save into a file 'MOD_module_rating.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_module_rating.pdf'), gp_both, width=13, height=5)
Figure 7.1: Disorder-specific illustration of functional modules. The same network layout is shown above but nodes are color-coded according to credit scores in neuropsychiatric disorders (left panel) and in inflammatory disorders (right panel).
# disorder-specific modules
df_group <- df_modules %>% mutate(group=ifelse(modules %in% c('M1','M2','M4'), '1 - neuropsychiatric', '2 - inflammatory')) %>% select(name,group)
ls_group <- df_group %>% nest(data=-group) %>% mutate(gene=map(data,~pull(.x,name))) %>% select(-data) %>% deframe()
# per target, keep the maximum phased drugs only (along with disease indications)
ChEMBL_slim <- oRDS("ChEMBL_slim", placeholder=placeholder)
# ontology annotation: two categories 'approved' and 'phased' (phases 2 and 3)
set <- ChEMBL_slim %>% filter(phase>=2) %>% select(Symbol,phase) %>% distinct() %>% mutate(phase=ifelse(phase==4,'approved','phased')) %>% nest(data=-phase) %>% mutate(data=map(data,~pull(.x))) %>% deframe()
# enrichment analysis
background <- union(dTarget_NPD$priority %>% pull(name), dTarget_IND$priority %>% pull(name))
ls_res <- lapply(1:length(ls_group), function(i){
eset <- oSEA(ls_group[[i]], set, background=background, size.range=c(10,20000), min.overlap=3)
eset %>% oSEAextract() %>% mutate(group=names(ls_group)[i])
})
df_res <- do.call(rbind, ls_res)
# forest plot
df_res <- df_res %>% filter(name %in% c('approved','phased')) %>% mutate(adjp=p.adjust(pvalue,method="BH"))
gp_forest <- df_res %>% oSEAforest(adjp.cutoff=1, zlim=c(0,6), colormap='grey50-orange-darkred', legend.direction="horizontal", sortBy="or")
# save into a file 'MOD_repurpose_forest.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_repurpose_forest.pdf'), gp_forest, width=5, height=1.5)
# write into a file 'MOD_repurpose_forest.txt' under the folder 'pipe_showcase'
df_res %>% select(group,name,zscore,adjp,or,CIl,CIu,nO,overlap) %>% arrange(group,name,adjp) %>% write_delim(file.path(outdir,'MOD_repurpose_forest.txt'), delim='\t')
Figure 8.1: Forest plot of approved or phased drug target enrichments.
Enriched approved or phased drug target categories stored in the output file MOD_repurpose_forest.txt
above can be explored below.
Drug repurposing dotplot for genes within modules specific to neuropsychiatric disorders
# dtt
dtt <- ChEMBL_slim %>% filter(phase>=4) %>% mutate(target_number=1) %>% as.data.frame()
# vec_data
df_data <- tibble(Symbol=ls_group$`1 - neuropsychiatric`)
vec_data <- dtt %>% select(Symbol,phase) %>% distinct() %>% arrange(-phase,Symbol) %>% semi_join(df_data, by='Symbol') %>% pull(Symbol)
# DR
DR <- oRepurpose(vec_data, phase.min=4, target.max=5, reorder="none", DTT=dtt, restricted=NULL, colormap="spectral.top", zlim=c(1,4), na.color='transparent', label.size=2.5, label.color="white", x.rotate=50, size=4, legend.title='Phase', x.text.size=6, y.text.size=6)
# write into a file 'MOD_repurpose_npd.txt' under the folder 'pipe_showcase'
DR$df %>% select(Target,Disease,Drug_index,Drug) %>% arrange(Drug_index,Disease) %>% write_delim(file.path(outdir,'MOD_repurpose_npd.txt'), delim='\t')
# save into a file 'MOD_repurpose_npd.pdf' under the folder 'pipe_showcase'
gp_DR <- DR$gp + theme(legend.position="none") + scale_y_discrete(position="right",limits=rev)+ scale_x_discrete(position="top",limits=rev) + coord_flip()
ggsave(file.path(outdir,'MOD_repurpose_npd.pdf'), gp_DR, width=3.5, height=14)
# save into a file 'MOD_repurpose_npd_index.pdf' under the folder 'pipe_showcase'
library(gridExtra)
tt <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"))
tt_left <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=0,x=0.01)), colhead=list(fg_params=list(hjust=0, x=0.01)))
### two-column tables, each row only showing the first one if multiple drugs
df_index <- DR$index
df_index <- df_index %>% mutate(Drug=str_replace_all(Drug,'], .*','] +'))
tmp <- df_index %>% separate_rows(Drug, sep=', ')
n <- tmp %>% slice(floor(tmp %>% nrow()/2)) %>% pull(Drug_index)
df <- df_index %>% mutate(Drug=str_replace_all(Drug,', ','\n'))
m <- df %>% nrow()
gt1 <- df %>% slice(1:n) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_left)
gt2 <- df %>% slice((n+1):m) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_left)
ls_gt <- list(gt1, gt2)
gp_table <- ls_gt %>% gridExtra::marrangeGrob(ncol=2, nrow=1, as.table=FALSE)
ggsave(file.path(outdir,'MOD_repurpose_npd_index.pdf'), gp_table, width=6.5, height=7)
Figure 8.2: Dot plot showing 10 genes (x-axis) that are currently targeted by approved drugs in disease (y-axis). Targets for neuropsychiatric disorders are highlighted in bold. Dots are referenced in integers (see the next).
Figure 8.3: The information for referenced drugs and mechanisms of action. See referenced integers above.
Drug-target-disease information stored in the output file MOD_repurpose_npd.txt
above can be explored below.
Drug repurposing dotplot for genes within modules specific to inflammatory disorders
# dtt
dtt <- ChEMBL_slim %>% filter(phase>=4) %>% mutate(target_number=1) %>% as.data.frame()
# vec_data
df_data <- tibble(Symbol=ls_group$`2 - inflammatory`)
vec_data <- dtt %>% select(Symbol,phase) %>% distinct() %>% arrange(-phase,Symbol) %>% semi_join(df_data, by='Symbol') %>% pull(Symbol)
# DR
DR <- oRepurpose(vec_data, phase.min=4, target.max=5, reorder="none", DTT=dtt, restricted=NULL, colormap="spectral.top", zlim=c(1,4), na.color='transparent', label.size=2.5, label.color="white", x.rotate=50, size=4, legend.title='Phase', x.text.size=6, y.text.size=6)
# write into a file 'MOD_repurpose_ind.txt' under the folder 'pipe_showcase'
DR$df %>% select(Target,Disease,Drug_index,Drug) %>% arrange(Drug_index,Disease) %>% write_delim(file.path(outdir,'MOD_repurpose_ind.txt'), delim='\t')
# save into a file 'MOD_repurpose_ind.pdf' under the folder 'pipe_showcase'
gp_DR <- DR$gp + theme(legend.position="none") + scale_y_discrete(position="right")
ggsave(file.path(outdir,'MOD_repurpose_ind.pdf'), gp_DR, width=6.5, height=3.5)
# save into a file 'MOD_repurpose_ind_index.pdf' under the folder 'pipe_showcase'
library(gridExtra)
tt <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"))
tt_right <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=1,x=0.98)), colhead=list(fg_params=list(hjust=1, x=0.98)))
### two-column tables, each row only showing the first one if multiple drugs
df_index <- DR$index
tmp <- df_index %>% separate_rows(Drug, sep=', ')
n <- tmp %>% slice(floor(tmp %>% nrow()/2)) %>% pull(Drug_index)
df <- df_index %>% mutate(Drug=str_replace_all(Drug,', ','\n'))
m <- df %>% nrow()
gt1 <- df %>% slice(1:n) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
gt2 <- df %>% slice((n+1):m) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
ls_gt <- list(gt1, gt2)
gp_table <- ls_gt %>% gridExtra::marrangeGrob(ncol=2, nrow=1, as.table=FALSE)
ggsave(file.path(outdir,'MOD_repurpose_ind_index.pdf'), gp_table, width=7, height=6)
Figure 8.4: Dot plot showing 11 genes (y-axis) that are currently targeted by approved drugs in disease (x-axis). Targets for inflammatory disorders are highlighted in bold. Dots are referenced in integers (see the next).
Figure 8.5: The information for referenced drugs and mechanisms of action. See referenced integers above.
Drug-target-disease information stored in the output file MOD_repurpose_ind.txt
above can be explored below.
# disorder-specific modules
df_group <- df_modules %>% mutate(group=ifelse(modules %in% c('M1','M2','M4'), '1 - neuropsychiatric', '2 - inflammatory')) %>% select(name,group)
ls_group <- df_group %>% nest(data=-group) %>% mutate(gene=map(data,~pull(.x,name))) %>% select(-data) %>% deframe()
# sets
sets <- tibble(onto='PSG') %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
# enrichment analysis
esad <- oSEAadv(ls_group, sets, size.range=c(0,50000), min.overlap=0, background=background)
# forest plot
df_output <- esad %>% oSEAextract() %>% arrange(onto,distance) %>% filter(onto=='PSG')
gp_forest <- df_output %>% oSEAforest(top=100, adjp.cutoff=1.01, zlim=NULL, colormap='grey50-orange-darkred', legend.direction="horizontal", sortBy="none")
# save into a file 'MOD_PSG_forest.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_PSG_forest.pdf'), gp_forest, width=7, height=3.5)
# write into a file 'MOD_PSG_forest.txt' under the folder 'pipe_showcase'
df_output %>% select(group,name,zscore,adjp,or,CIl,CIu,nO,overlap) %>% arrange(group,name,adjp) %>% write_delim(file.path(outdir,'MOD_PSG_forest.txt'), delim='\t')
Figure 9.1: Forest plot of phylostrata enrichments. A phylostratum contains a group of genes that were first created at this phylostratum (defined as gene evolutionary age), dated through genomic phylostratigraphy (PSG). Plylostrata are ordered according to evolutionary history, from the earliest (PSG01) to the most recent (PSG16). Also listed are genes first created at each enriched phylostratum.
Enriched phylostrata stored in the output file MOD_PSG_forest.txt
above can be explored below.
# levels.customised
levels.customised <- df_output %>% filter(group=='2 - inflammatory') %>% separate_rows(overlap,sep=', ') %>% filter(overlap!='') %>% arrange(name,overlap) %>% distinct(overlap) %>% pull(overlap)
# upset plot
gp_PSG_inflam <- df_output %>% filter(group=='2 - inflammatory') %>% oSEAupset(top=NULL, adjp.cutoff=1, color="orange", shape=18, size.range=c(1,5), member.levels=c("auto","num","customised")[3], levels.customised=levels.customised, sortBy="name")
# save into a file 'MOD_PSG_upset.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_PSG_upset.pdf'), gp_PSG_inflam, width=9, height=6.5)
Figure 9.2: Enrichment analysis using phylostrata reveals evolutionary origins of genes in modules specific to inflammatory modules. Kite plot of enriched phylostrata (right panel), with created genes per phylostratum as indicated in blue dots (left panel). Dated through genomic phylostratigraphy (PSG), phylostrata are coded from the earliest (PSG01) to the most recent (PSG16). A phylostratum contains a group of genes that were first created at this phylostratum.