In this demo, we illustrate how to employ a supra-hexagonal map, constructing a prioritisation map based on genes in two crosstalks (see the demos neuropsychiatric disorders and inflammatory disorders), and how to integrate druggable pocket data identifying structurally targetable genes.
# Please install R (version 4.3.2 or above)
# see https://cran.r-project.org
# if the package 'BiocManager' not installed, please do so
if(!("BiocManager" %in% rownames(installed.packages()))) install.packages("BiocManager")
# first, install basic packages: remotes, tidyverse
BiocManager::install(c('remotes','tidyverse'), dependencies=T)
# then, install the package 'PIPE' (now hosted at github)
BiocManager::install("hfang-bristol/PIPE", dependencies=T, force=T)
# check the package 'PIPE' successfully installed
library(PIPE)
# load packages
library(PIPE)
library(supraHex)
library(tidyverse)
# specify built-in data placeholder
placeholder <- "http://www.comptransmed.pro/bigdata_pipe"
# create the output folder 'pipe_showcase'
outdir <- 'pipe_showcase'
if(!dir.exists(outdir)) dir.create(outdir)
Load results for neuropsychiatric disorders (NPD) generated in the demo neuropsychiatric disorders, including target gene prioritisation (dTarget_NPD.RData
) and pathway crosstalk (subg_NPD.RData
).
Load results for inflammatory disorders (IND) generated in the demo inflammatory disorders, including target gene prioritisation (dTarget_IND.RData
) and pathway crosstalk (subg_IND.RData
).
# load results for neuropsychiatric disorders (NPD)
## target gene prioritisation
dTarget_NPD <- load(file.path(outdir,'dTarget_NPD.RData')) %>% get()
## pathway crosstalk
subg_NPD <- load(file.path(outdir,'subg_NPD.RData')) %>% get()
# load results for inflammatory disorders (IND)
## target gene prioritisation
dTarget_IND <- load(file.path(outdir,'dTarget_IND.RData')) %>% get()
## pathway crosstalk
subg_IND <- load(file.path(outdir,'subg_IND.RData')) %>% get()
# a matrix for genes in two systems of disorders
df_priority_NPD <- dTarget_NPD$priority %>% as_tibble() %>% mutate(trait='NPD')
df_priority_IND <- dTarget_IND$priority %>% as_tibble() %>% mutate(trait='IND')
df_priority <- rbind(df_priority_NPD, df_priority_IND)
mat_priority <- df_priority %>% select(name, trait, rating) %>% pivot_wider(names_from=trait, values_from=rating, values_fill=0) %>% column_to_rownames('name')
# restricted to pathway crosstalk genes
vec <- union(V(subg_NPD)$name, V(subg_IND)$name)
ind <- match(vec, rownames(mat_priority))
df_crosstalk_both <- mat_priority[ind,]
data <- df_crosstalk_both
sMap <- sPipeline(data, scaling=3, finetuneSustain=T)
sBase <- sDmatCluster(sMap)
df_suprahex <- sWriteData(sMap, data, sBase, keep.data=T) %>% as_tibble() %>% transmute(Target=ID, Index=str_c('H',Hexagon_index), Cluster=str_c('C',Cluster_base), NPD, IND)
# write into a file 'MAP_suprahex_base.txt' under the folder 'pipe_showcase'
df_suprahex %>% arrange(Cluster,Index,Target) %>% write_delim(file.path(outdir,'MAP_suprahex_base.txt'), delim='\t')
Prioritisation map in the output file MAP_suprahex_base.txt
above can be explored below.
SIFTS_fpocket <- oRDS('SIFTS_fpocket',placeholder=placeholder)
# additional data
additional <- SIFTS_fpocket %>% filter(druggable=='Y') %>% select(Symbol, druggable) %>% distinct()
additional <- data %>% as_tibble(rownames='gene') %>% select(gene) %>% left_join(additional, by=c('gene'='Symbol')) %>% mutate(druggable=ifelse(is.na(druggable),0,1)) %>% pull(druggable)
# sOverlay
sOverlay <- sMapOverlay(sMap, data, additional)
# polar barplot
gp_polar <- df_suprahex %>% mutate(targetable=additional) %>% group_by(Cluster) %>% summarise(val=mean(targetable)) %>% oPolarBar(colormap='sci_locuszoom', size.name=10, size.value=2.5, parallel=F)
# save into a file 'MAP_suprahex_targetable.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MAP_suprahex_targetable.pdf'), gp_polar, width=5, height=5)
Figure 6.1: Polar bar showing the percentage of targetable genes per cluster. A gene is defined to be targetable if predicted to have drug-like binding sites (that is, druggable pockets) based on its known protein structure(s).
# mat_fpocket: a matrix of PDB x gene containing the number of pockets
df_fpocket <- SIFTS_fpocket %>% filter(druggable=='Y') %>% group_by(Symbol,PDB_code) %>% summarise(num_pockets=n()) %>% arrange(Symbol,desc(num_pockets),PDB_code) %>% ungroup()
mat_fpocket <- df_fpocket %>% pivot_wider(names_from=Symbol, values_from=num_pockets) %>% column_to_rownames('PDB_code')
## remove those genes with PDB numbers >=10
ind <- which(apply(!is.na(mat_fpocket),2,sum)<=10)
mat_fpocket <- mat_fpocket[,ind]
# genes in C3
vec_genes <- df_suprahex %>% filter(Cluster=='C3') %>% pull(Target)
# a matrix for C3
ind <- match(vec_genes, colnames(mat_fpocket))
data_matrix <- mat_fpocket[,ind[!is.na(ind)]]
## remove those PDBs that are empty across genes
ind <- which(apply(!is.na(data_matrix), 1, sum)!=0)
data_matrix <- data_matrix[ind,]
# heatmap
gp_pdb <- oHeatmap(t(data_matrix), reorder="col", colormap="brewer.Blues", zlim=c(0,4), x.rotate=90, shape=19, size=2, x.text.size=6,y.text.size=5, na.color='transparent', legend.title='# pockets', barheight=4)
# save into a file 'MAP_suprahex_C3_pdb.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'MAP_suprahex_C3_pdb.pdf'), gp_pdb, width=6, height=2)
# write into a file 'MAP_suprahex_C3_pdb.txt' under the folder 'pipe_showcase'
df_symbol_desc <- SIFTS_fpocket %>% distinct(Symbol,Description)
data_matrix %>% as_tibble(rownames='pdb') %>% pivot_longer(-pdb, names_to='gene', values_to='num', values_drop_na=T) %>% inner_join(df_symbol_desc, by=c('gene'='Symbol')) %>% transmute(Target=gene, Description, PDB=pdb, Number=num) %>% arrange(Target,PDB) %>% write_delim(file.path(outdir,'MAP_suprahex_C3_pdb.txt'), delim='\t')
Figure 7.1: Druggable pockets for genes in C3. Dot plot shows 15 targetable genes (y-axis) and their PDB known protein structures (x-axis). Color-coded is the number of druggable pockets predicted based on PDB structures.
Druggable pockets in the output file MAP_suprahex_C3_pdb.txt
above can be explored below.