1 Overview

In this demo, we illustrate how to examine the modular structure of a network by merging both pathway crosstalks (see the demos neuropsychiatric disorders and inflammatory disorders), identifying functional modules with therapeutic potentials (enriched for approved drug targets) and evolutionary origins (co-arising of inflammatory disorder-specific modules with the speciation of bony fish).

2 R and Package installation

# Please install R (version 4.3.2 or above)
# see https://cran.r-project.org

# if the package 'BiocManager' not installed, please do so
if(!("BiocManager" %in% rownames(installed.packages()))) install.packages("BiocManager")

# first, install basic packages: remotes, tidyverse, pbapply, patchwork
BiocManager::install(c('remotes','tidyverse','pbapply','patchwork'), dependencies=T)

# then, install the package 'PIPE' (now hosted at github)
BiocManager::install("hfang-bristol/PIPE", dependencies=T, force=T)

# check the package 'PIPE' successfully installed
library(PIPE)

3 Package loading and built-in data placeholder

# load packages
library(PIPE)
library(tidyverse)
library(patchwork)

# specify built-in data placeholder
placeholder <- "http://www.comptransmed.pro/bigdata_pipe"

# create the output folder 'pipe_showcase'
outdir <- 'pipe_showcase'
if(!dir.exists(outdir)) dir.create(outdir)

4 Merging pathway crosstalks

Load results for neuropsychiatric disorders (NPD) generated in the demo neuropsychiatric disorders, including target gene prioritisation (dTarget_NPD.RData) and pathway crosstalk (subg_NPD.RData).

Load results for inflammatory disorders (IND) generated in the demo inflammatory disorders, including target gene prioritisation (dTarget_IND.RData) and pathway crosstalk (subg_IND.RData).

# load results for neuropsychiatric disorders (NPD)
## target gene prioritisation 
dTarget_NPD <- load(file.path(outdir,'dTarget_NPD.RData')) %>% get()
## pathway crosstalk
subg_NPD <- load(file.path(outdir,'subg_NPD.RData')) %>% get()

# load results for inflammatory disorders (IND)
## target gene prioritisation
dTarget_IND <- load(file.path(outdir,'dTarget_IND.RData')) %>% get()
## pathway crosstalk
subg_IND <- load(file.path(outdir,'subg_IND.RData')) %>% get()

# merge both pathway crosstalks
subg_both <-  list(subg_IND,subg_NPD) %>% oCombineNet() %>% delete_vertex_attr("priority") %>% oLayout("graphlayouts.layout_with_stress")

5 Network module analysis

# identify modules
set.seed(825)
V(subg_both)$modules <- subg_both %>% cluster_spinglass(spins=7) %>% membership()
table(V(subg_both)$modules)

# reorder by the module size (optional)
df <- V(subg_both)$modules %>% enframe()
df1 <- df %>% count(value) %>% mutate(new=rank(n,ties.method="first"))
df <- df %>% inner_join(df1, by='value') %>% transmute(name, value=new)
V(subg_both)$modules <- deframe(df)
table(V(subg_both)$modules)

# visualise modules
## add coordinates (within and between modules)
subg_both <- subg_both %>% oAddCoords("modules", glayout=layout_with_kk, edge.color.alternative=c("grey70","grey95"))
## do visualisation
gg <- subg_both %>% oGGnetwork(node.xcoord="xcoord", node.ycoord="ycoord", node.color="modules", colormap="sci_locuszoom", node.color.alpha=0.5, node.size.range=3, edge.color="color", edge.arrow.gap=0)
## also add gene label
gg_label <- gg + ggrepel::geom_text_repel(data=gg$data %>% distinct(x,y,name), aes(x,y,label=name), color='black', force=2, size=1.8, alpha=0.7, show.legend=F, segment.alpha=0.5, segment.color="grey70", segment.size=0.2, seed=825, max.overlaps=Inf)

# save into a file 'MOD_module.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_module.pdf'), gg, width=6, height=6)

# save into a file 'MOD_module2.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_module2.pdf'), gg_label, width=6, height=6)

# write into a file 'MOD_module.txt' under the folder 'pipe_showcase'
ig <- subg_both
ind <- match(V(ig)$name, dTarget_NPD$priority$name)
V(ig)$NPD_priority <- dTarget_NPD$priority$rating[ind]
ind <- match(V(ig)$name, dTarget_IND$priority$name)
V(ig)$IND_priority <- dTarget_IND$priority$rating[ind]
oIG2TB(ig,'nodes') %>% transmute(name, modules=str_c('M',modules), NPD_priority, IND_priority, description) %>% arrange(modules,name) %>% write_delim(file.path(outdir,'MOD_module.txt'), delim='\t')

Figure 5.1: Modular visualisation of a network merging two pathway crosstalks. Modules are color-coded as indicated, with member genes also color-coded and labelled.

Modular visualisation of a network merging two pathway crosstalks. Modules are color-coded as indicated, with member genes also color-coded and labelled.

The module information stored in the output file MOD_module.txt above can be explored below.

6 Functional characterisation for modules

# extract genes per module
df_modules <- subg_both %>% igraph::as_data_frame("vertices") %>% as_tibble() %>% transmute(name, modules=str_c('M',modules)) %>% arrange(modules)

# reveal functional modules
## ls_modules
ls_modules <- df_modules %>% select(name,modules) %>% nest(data=-modules) %>% mutate(gene=map(data,~pull(.x,name))) %>% select(-data) %>% deframe()
## background
background <- df_modules %>% pull(name)
## sets
sets <- tibble(onto='KEGG') %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
## esad
esad <- oSEAadv(ls_modules, sets, background=background, size.range=c(10,500), min.overlap=5)

# balloon plot
## top two enrichments per module
gp <- esad %>% oSEAextract() %>% filter(adjp<1e-2, namespace %in% 'Environmental Process') %>% group_by(group) %>% top_n(2,-adjp) %>% oSEAballoon(top=5, adjp.cutoff=0.05, zlim=c(0,15), slim=c(0,20), size.range=c(1,6), shape=19, colormap="grey90-orange-darkred")
gp_kegg <- gp + theme(axis.text.y=element_text(size=6),axis.text.x=element_text(size=6,angle=0),strip.text.y=element_text(size=6,angle=0))

# save into a file 'MOD_module_kegg.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_module_kegg.pdf'), gp_kegg, width=5, height=2)

# write into a file 'MOD_module_kegg.txt' under the folder 'pipe_showcase'
esad %>% oSEAextract() %>% filter(adjp<1e-2, namespace %in% 'Environmental Process') %>% group_by(group) %>% top_n(2,-adjp) %>% arrange(group,adjp) %>% write_delim(file.path(outdir,'MOD_module_kegg.txt'), delim='\t')

Figure 6.1: KEGG enrichment analysis for genes within each of five modules (M1 – M5).

Enriched pathways stored in the output file MOD_module_kegg.txt above can be explored below.

7 Disorder-specific illustration of functional modules

# neuropsychiatric disorder: gp_rating_npd
ig <- subg_both
ind <- match(V(ig)$name, dTarget_NPD$priority$name)
V(ig)$priority <- dTarget_NPD$priority$rating[ind]
gp_rating_npd <- oGGnetwork(ig, node.label='label', node.label.size=1, node.label.color='black', node.label.alpha=0.8, node.label.padding=0.1, node.label.arrow=0, node.label.force=0.01, node.shape=19, node.xcoord='xcoord', node.ycoord='ycoord', node.color='priority', node.color.title='Credit\nscore', colormap='spectral', zlim=c(0,10), node.color.alpha=0.9, node.size.range=3, edge.color="color",edge.color.alpha=0.5,edge.curve=0,edge.arrow.gap=0)
gp_rating_npd  <- gp_rating_npd + ggtitle('Neuropsychiatric disorders')

# inflammatory disorder: gp_rating_ind
ig <- subg_both
ind <- match(V(ig)$name, dTarget_IND$priority$name)
V(ig)$priority <- dTarget_IND$priority$rating[ind]
gp_rating_ind <- oGGnetwork(ig, node.label='label', node.label.size=1, node.label.color='black', node.label.alpha=0.8, node.label.padding=0.1, node.label.arrow=0, node.label.force=0.01, node.shape=19, node.xcoord='xcoord', node.ycoord='ycoord', node.color='priority', node.color.title='Credit\nscore', colormap='spectral', zlim=c(0,10), node.color.alpha=0.9, node.size.range=3, edge.color="color",edge.color.alpha=0.5,edge.curve=0,edge.arrow.gap=0)
gp_rating_ind  <- gp_rating_ind + ggtitle('Inflammatory disorders')

# gp_both
gp_both <- list(gp_rating_npd,gp_rating_ind) %>% wrap_plots(ncol=2, guides='collect')

# save into a file 'MOD_module_rating.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_module_rating.pdf'), gp_both, width=13, height=5)

Figure 7.1: Disorder-specific illustration of functional modules. The same network layout is shown above but nodes are color-coded according to credit scores in neuropsychiatric disorders (left panel) and in inflammatory disorders (right panel).

Disorder-specific illustration of functional modules. The same network layout is shown above but nodes are color-coded according to credit scores in neuropsychiatric disorders (left panel) and in inflammatory disorders (right panel).

8 Repurposing opportunities

8.1 Enrichment analysis

# disorder-specific modules
df_group <- df_modules %>% mutate(group=ifelse(modules %in% c('M1','M2','M4'), '1 - neuropsychiatric', '2 - inflammatory')) %>% select(name,group) 
ls_group <- df_group %>% nest(data=-group) %>% mutate(gene=map(data,~pull(.x,name))) %>% select(-data) %>% deframe()

# per target, keep the maximum phased drugs only (along with disease indications)
ChEMBL_slim <- oRDS("ChEMBL_slim", placeholder=placeholder)

# ontology annotation: two categories 'approved' and 'phased' (phases 2 and 3)
set <- ChEMBL_slim %>% filter(phase>=2) %>% select(Symbol,phase) %>% distinct() %>% mutate(phase=ifelse(phase==4,'approved','phased')) %>% nest(data=-phase) %>% mutate(data=map(data,~pull(.x))) %>% deframe()

# enrichment analysis
background <- union(dTarget_NPD$priority %>% pull(name), dTarget_IND$priority %>% pull(name))
ls_res <- lapply(1:length(ls_group), function(i){
    eset <- oSEA(ls_group[[i]], set, background=background, size.range=c(10,20000), min.overlap=3)
    eset %>% oSEAextract() %>% mutate(group=names(ls_group)[i])
})
df_res <- do.call(rbind, ls_res)

# forest plot
df_res <- df_res %>% filter(name %in% c('approved','phased')) %>% mutate(adjp=p.adjust(pvalue,method="BH"))
gp_forest <- df_res %>% oSEAforest(adjp.cutoff=1, zlim=c(0,6), colormap='grey50-orange-darkred', legend.direction="horizontal", sortBy="or")

# save into a file 'MOD_repurpose_forest.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_repurpose_forest.pdf'), gp_forest, width=5, height=1.5)

# write into a file 'MOD_repurpose_forest.txt' under the folder 'pipe_showcase'
df_res %>% select(group,name,zscore,adjp,or,CIl,CIu,nO,overlap) %>% arrange(group,name,adjp) %>% write_delim(file.path(outdir,'MOD_repurpose_forest.txt'), delim='\t')

Figure 8.1: Forest plot of approved or phased drug target enrichments.

Enriched approved or phased drug target categories stored in the output file MOD_repurpose_forest.txt above can be explored below.

8.2 Repurposing analysis for neuropsychiatric disorders

Drug repurposing dotplot for genes within modules specific to neuropsychiatric disorders

# dtt
dtt <- ChEMBL_slim %>% filter(phase>=4) %>% mutate(target_number=1) %>% as.data.frame()

# vec_data
df_data <- tibble(Symbol=ls_group$`1 - neuropsychiatric`)
vec_data <- dtt %>% select(Symbol,phase) %>% distinct() %>% arrange(-phase,Symbol) %>% semi_join(df_data, by='Symbol') %>% pull(Symbol)

# DR
DR <- oRepurpose(vec_data, phase.min=4, target.max=5, reorder="none", DTT=dtt, restricted=NULL, colormap="spectral.top", zlim=c(1,4), na.color='transparent', label.size=2.5, label.color="white", x.rotate=50, size=4, legend.title='Phase', x.text.size=6, y.text.size=6)

# write into a file 'MOD_repurpose_npd.txt' under the folder 'pipe_showcase'
DR$df %>% select(Target,Disease,Drug_index,Drug) %>% arrange(Drug_index,Disease) %>% write_delim(file.path(outdir,'MOD_repurpose_npd.txt'), delim='\t')

# save into a file 'MOD_repurpose_npd.pdf' under the folder 'pipe_showcase'
gp_DR <- DR$gp + theme(legend.position="none") + scale_y_discrete(position="right",limits=rev)+ scale_x_discrete(position="top",limits=rev) + coord_flip()
ggsave(file.path(outdir,'MOD_repurpose_npd.pdf'), gp_DR, width=3.5, height=14)

# save into a file 'MOD_repurpose_npd_index.pdf' under the folder 'pipe_showcase'
library(gridExtra)
tt <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"))
tt_left <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=0,x=0.01)), colhead=list(fg_params=list(hjust=0, x=0.01)))
### two-column tables, each row only showing the first one if multiple drugs
df_index <- DR$index
df_index <- df_index %>% mutate(Drug=str_replace_all(Drug,'], .*','] +'))
tmp <- df_index %>% separate_rows(Drug, sep=', ')
n <- tmp %>% slice(floor(tmp %>% nrow()/2)) %>% pull(Drug_index)
df <- df_index %>% mutate(Drug=str_replace_all(Drug,', ','\n'))
m <- df %>% nrow()
gt1 <- df %>% slice(1:n) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_left)
gt2 <- df %>% slice((n+1):m) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_left)
ls_gt <- list(gt1, gt2)
gp_table <- ls_gt %>% gridExtra::marrangeGrob(ncol=2, nrow=1, as.table=FALSE)
ggsave(file.path(outdir,'MOD_repurpose_npd_index.pdf'), gp_table, width=6.5, height=7)

Figure 8.2: Dot plot showing 10 genes (x-axis) that are currently targeted by approved drugs in disease (y-axis). Targets for neuropsychiatric disorders are highlighted in bold. Dots are referenced in integers (see the next).

Dot plot showing 10 genes (x-axis) that are currently targeted by approved drugs in disease (y-axis). Targets for neuropsychiatric disorders are highlighted in bold. Dots are referenced in integers (see the next).

Figure 8.3: The information for referenced drugs and mechanisms of action. See referenced integers above.

Drug-target-disease information stored in the output file MOD_repurpose_npd.txt above can be explored below.

8.3 Repurposing analysis for inflammatory disorders

Drug repurposing dotplot for genes within modules specific to inflammatory disorders

# dtt
dtt <- ChEMBL_slim %>% filter(phase>=4) %>% mutate(target_number=1) %>% as.data.frame()

# vec_data
df_data <- tibble(Symbol=ls_group$`2 - inflammatory`)
vec_data <- dtt %>% select(Symbol,phase) %>% distinct() %>% arrange(-phase,Symbol) %>% semi_join(df_data, by='Symbol') %>% pull(Symbol)

# DR
DR <- oRepurpose(vec_data, phase.min=4, target.max=5, reorder="none", DTT=dtt, restricted=NULL, colormap="spectral.top", zlim=c(1,4), na.color='transparent', label.size=2.5, label.color="white", x.rotate=50, size=4, legend.title='Phase', x.text.size=6, y.text.size=6)

# write into a file 'MOD_repurpose_ind.txt' under the folder 'pipe_showcase'
DR$df %>% select(Target,Disease,Drug_index,Drug) %>% arrange(Drug_index,Disease) %>% write_delim(file.path(outdir,'MOD_repurpose_ind.txt'), delim='\t')

# save into a file 'MOD_repurpose_ind.pdf' under the folder 'pipe_showcase'
gp_DR <- DR$gp + theme(legend.position="none") + scale_y_discrete(position="right")
ggsave(file.path(outdir,'MOD_repurpose_ind.pdf'), gp_DR, width=6.5, height=3.5)

# save into a file 'MOD_repurpose_ind_index.pdf' under the folder 'pipe_showcase'
library(gridExtra)
tt <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"))
tt_right <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=1,x=0.98)), colhead=list(fg_params=list(hjust=1, x=0.98)))
### two-column tables, each row only showing the first one if multiple drugs
df_index <- DR$index
tmp <- df_index %>% separate_rows(Drug, sep=', ')
n <- tmp %>% slice(floor(tmp %>% nrow()/2)) %>% pull(Drug_index)
df <- df_index %>% mutate(Drug=str_replace_all(Drug,', ','\n'))
m <- df %>% nrow()
gt1 <- df %>% slice(1:n) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
gt2 <- df %>% slice((n+1):m) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
ls_gt <- list(gt1, gt2)
gp_table <- ls_gt %>% gridExtra::marrangeGrob(ncol=2, nrow=1, as.table=FALSE)
ggsave(file.path(outdir,'MOD_repurpose_ind_index.pdf'), gp_table, width=7, height=6)

Figure 8.4: Dot plot showing 11 genes (y-axis) that are currently targeted by approved drugs in disease (x-axis). Targets for inflammatory disorders are highlighted in bold. Dots are referenced in integers (see the next).

Dot plot showing 11 genes (y-axis) that are currently targeted by approved drugs in disease (x-axis). Targets for inflammatory disorders are highlighted in bold. Dots are referenced in integers (see the next).

Figure 8.5: The information for referenced drugs and mechanisms of action. See referenced integers above.

$The information for referenced drugs and mechanisms of action. See referenced integers above.$

Drug-target-disease information stored in the output file MOD_repurpose_ind.txt above can be explored below.

9 Evolutionary origins

9.1 Enrichment analysis

# disorder-specific modules
df_group <- df_modules %>% mutate(group=ifelse(modules %in% c('M1','M2','M4'), '1 - neuropsychiatric', '2 - inflammatory')) %>% select(name,group) 
ls_group <- df_group %>% nest(data=-group) %>% mutate(gene=map(data,~pull(.x,name))) %>% select(-data) %>% deframe()

# sets
sets <- tibble(onto='PSG') %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
  
# enrichment analysis
esad <- oSEAadv(ls_group, sets, size.range=c(0,50000), min.overlap=0, background=background)

# forest plot
df_output <- esad %>% oSEAextract() %>% arrange(onto,distance) %>% filter(onto=='PSG')
gp_forest <- df_output %>% oSEAforest(top=100, adjp.cutoff=1.01, zlim=NULL, colormap='grey50-orange-darkred', legend.direction="horizontal", sortBy="none")

# save into a file 'MOD_PSG_forest.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_PSG_forest.pdf'), gp_forest, width=7, height=3.5)

# write into a file 'MOD_PSG_forest.txt' under the folder 'pipe_showcase'
df_output %>% select(group,name,zscore,adjp,or,CIl,CIu,nO,overlap) %>% arrange(group,name,adjp) %>% write_delim(file.path(outdir,'MOD_PSG_forest.txt'), delim='\t')

Figure 9.1: Forest plot of phylostrata enrichments. A phylostratum contains a group of genes that were first created at this phylostratum (defined as gene evolutionary age), dated through genomic phylostratigraphy (PSG). Plylostrata are ordered according to evolutionary history, from the earliest (PSG01) to the most recent (PSG16). Also listed are genes first created at each enriched phylostratum.

Forest plot of phylostrata enrichments. A phylostratum contains a group of genes that were first created at this phylostratum (defined as gene evolutionary age), dated through genomic phylostratigraphy (PSG). Plylostrata are ordered according to evolutionary history, from the earliest (PSG01) to the most recent (PSG16). Also listed are genes first created at each enriched phylostratum.

Enriched phylostrata stored in the output file MOD_PSG_forest.txt above can be explored below.

9.2 UpSet plot for inflammatory disorders

# levels.customised
levels.customised <- df_output %>% filter(group=='2 - inflammatory') %>% separate_rows(overlap,sep=', ') %>% filter(overlap!='') %>% arrange(name,overlap) %>% distinct(overlap) %>% pull(overlap)

# upset plot
gp_PSG_inflam <- df_output %>% filter(group=='2 - inflammatory') %>% oSEAupset(top=NULL, adjp.cutoff=1, color="orange", shape=18, size.range=c(1,5), member.levels=c("auto","num","customised")[3], levels.customised=levels.customised, sortBy="name")

# save into a file 'MOD_PSG_upset.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MOD_PSG_upset.pdf'), gp_PSG_inflam, width=9, height=6.5)

Figure 9.2: Enrichment analysis using phylostrata reveals evolutionary origins of genes in modules specific to inflammatory modules. Kite plot of enriched phylostrata (right panel), with created genes per phylostratum as indicated in blue dots (left panel). Dated through genomic phylostratigraphy (PSG), phylostrata are coded from the earliest (PSG01) to the most recent (PSG16). A phylostratum contains a group of genes that were first created at this phylostratum.

Enrichment analysis using phylostrata reveals evolutionary origins of genes in modules specific to inflammatory modules. Kite plot of enriched phylostrata (right panel), with created genes per phylostratum as indicated in blue dots (left panel). Dated through genomic phylostratigraphy (PSG), phylostrata are coded from the earliest (PSG01) to the most recent (PSG16). A phylostratum contains a group of genes that were first created at this phylostratum.

Showcasing crosstalk network functional modules (MOD)