Project notebook with R¶
Easy access to all the experimental data, statistical analysis results and visualization of reports using R
All the R functionality at hand
Import R libraries¶
Make sure that all the libraries you plan to use have been previously installed.
If not, you can install them through the notebook.
E.g. install.packages(“tidyverse”)
[10]:
library(rhdf5)
library(rjson)
library(foreach)
library(plotly)
library(tidyverse)
library(networkD3)
Define custom functions¶
Within the Jupyter notebook you can create all the functions relevant to you work that are not implemented in the analytics core. This is the case of R functions. Here we define R functions that allow us to load a project from the CKG database, access different datasets and plots, and visualize the latter in the notebook.
[11]:
importGraph <- function(json_network){
edges_df = data.frame(matrix(unlist(json_network$net_json$links), nrow=length(json_network$net_json$links), byrow=T))
colnames(edges_df)<-names(unlist(json_network$net_json$links[[1]]))
edges_df <- edges_df[c("source", "target", "width")]
colnames(edges_df)<-c("source", "target", "weight")
nodes = data.frame(matrix(unlist(json_network$net_json$nodes), nrow=length(json_network$net_json$nodes), byrow=T))
colnames(nodes)<-names(unlist(json_network$net_json$nodes[[1]]))
nodes['ident']<- 0:(dim(nodes)[1]-1)
nodes <- nodes[c("ident", "id", "color")]
colnames(nodes) <- c("id", "label", "color")
edges <- edges_df %>%
left_join(nodes, by = c("source" = "label")) %>%
rename(from = id)
edges <- edges %>%
left_join(nodes, by = c("target" = "label")) %>%
rename(to = id)
edges <- edges[c("from", "to", "weight")]
library(networkD3)
g<-forceNetwork(Links = edges, Nodes = nodes,
height = 850, width = 800,
Source = "from", Target = "to",
Value = "weight", NodeID = "label",
Group = "color", opacity = 0.8,opacityNoHover = 1,
zoom = T, linkWidth = JS("function(d) { return Math.sqrt(d.weight); }"))
return(g)
}
read_report <- function(report_file, report_name){
plots <- c()
nets <- c()
if(file.exists(report_file)){
content <- h5ls(report_file)
foreach(group=unique(content$group)) %do% {
if(group != "/"){
if(!grepl('Table', group)){
group = substring(group, 2)
report_figure <- h5read(report_file, group)
foreach(name=unique(names(report_figure))) %do% {
if(grepl('figure',name)){
report_figure_str<-fromJSON(report_figure[[name]][1])
p = report_figure_str$props$figure
plot <- list(plot=p)
names(plot) <- paste(group,name,sep="_")
plots<-append(plots, plot)
}
else if(grepl('net', name)){
report_net_str <- fromJSON(report_figure[[name]][1])
net <- list(net=report_net_str)
names(net) <- name
nets<-append(nets, net)
}
}
}
}
}
}
h5closeAll()
report <- list(name = report_name, plots = plots, nets = nets)
class(report) <- "report"
return(report)
}
read_dataset <- function(dataset_file, dataset_name){
dataframes <- c()
if(file.exists(dataset_file)){
content <- h5ls(dataset_file)
foreach(group=unique(content$group)) %do% {
if(group != "/"){
group = substring(group, 2)
dataframe_json <- h5read(dataset_file, group)
foreach(name=unique(names(dataframe_json))) %do% {
if(is.null(names(dataframe_json[[name]]))){
dataframe_str<-fromJSON(dataframe_json[[name]][1])}
df <- data.frame(matrix(unlist(dataframe_str), nrow=length(dataframe_str), byrow=T))
colnames(df)<-names(unlist(dataframe_str[[1]]))
dataframe <- list(data=df)
names(dataframe) <- name
dataframes<-append(dataframes, dataframe)
}
}
}
}
h5closeAll()
dataset <- list(name=dataset_name, data=dataframes)
class(dataset) <- "dataset"
return(dataset)
}
load_project <- function(project_id, dataset){
plots = c()
nets = c()
project_report_dir <- paste("../../../data/reports/", project_id, sep = "")
project_dataset_report_dir <- paste(project_report_dir,dataset, sep="/")
report_file = paste(project_dataset_report_dir, "report.h5", sep = "/")
datasets_file = paste(project_dataset_report_dir, paste(dataset,"dataset.h5",sep='_'), sep = "/")
report = read_report(report_file, paste(project_id, "Report", dataset))
datasets = read_dataset(datasets_file, paste(project_id, "Dataset", dataset))
project <- list(id=project_id, report=report, datasets=datasets)
return(project)
}
Create a project object by loading an existent report¶
[12]:
p = load_project("P0000001", "proteomics")
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [56749] is not a sub-multiple or multiple of the number of rows [17298]”
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [942] is not a sub-multiple or multiple of the number of rows [188]”
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [140] is not a sub-multiple or multiple of the number of rows [8]”
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [152735] is not a sub-multiple or multiple of the number of rows [26135]”
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [152735] is not a sub-multiple or multiple of the number of rows [26135]”
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [19912] is not a sub-multiple or multiple of the number of rows [6187]”
Visualize the list of plots contained in the report¶
[13]:
names(p$report$plots)
- '0~proteomics_pipeline~cytoscape_network_0_figure'
- '11~stratification_description~description_0_figure'
- '12~stratification_pca~pca_0_figure'
- '13~regulation_description~description_0_figure'
- '15~regulation_samr~volcanoplot_0_figure'
- '15~regulation_samr~volcanoplot_1_figure'
- '15~regulation_samr~volcanoplot_2_figure'
- '15~regulation_samr~volcanoplot_3_figure'
- '15~regulation_samr~volcanoplot_4_figure'
- '15~regulation_samr~volcanoplot_5_figure'
- '15~regulation_samr~volcanoplot_6_figure'
- '15~regulation_samr~volcanoplot_7_figure'
- '15~regulation_samr~volcanoplot_8_figure'
- '15~regulation_samr~volcanoplot_9_figure'
- '22~literature_associations_publications_abstracts~wordcloud_0_figure'
- '2~peptides~barplot_0_figure'
- '4~proteins~barplot_0_figure'
- '6~modifications~facetplot_0_figure'
- '8~coefficient_variation_coefficient_of_variation~scatterplot_matrix_0_figure'
- '9~ranking_ranking_with_markers~ranking_0_figure'
Access a specific plot and use plotly to visualize it¶
[18]:
plotly_build(p$report$plots$`2~peptides~barplot_0_figure`)
In the case of networks, they have to be converted from a json format, to an edge list. We use R’s networkd3 to create a D3 JavaScript force directed network graph from it.¶
[15]:
importGraph(p$report$nets$`0_net`)
Warning message:
“Column `source`/`label` joining factors with different levels, coercing to character vector”
Warning message:
“Column `target`/`label` joining factors with different levels, coercing to character vector”
We can also, easily, access the different datasets from the project¶
[16]:
names(p$datasets$data)
- 'complex_associations'
- 'correlation_correlation'
- 'disease_associations'
- 'drug_associations'
- 'go annotation'
- 'go_enrichment_Biological_processes_regulation_enrichment'
- 'interaction_network'
- 'literature_associations_publications_abstracts'
- 'number of modified proteins'
- 'number of peptides'
- 'number of proteins'
- 'original'
- 'overview statistics_summary'
- 'pathway annotation'
- 'pathway_enrichment_Pathways_regulation_enrichment'
- 'processed'
- 'protein biomarkers'
- 'regulated'
- 'regulation table'
- 'Data Matrix Shape'
- 'Stats'
[17]:
p$datasets$data$correlation_correlation
node1 | node2 | weight | pvalue | padj | rejected |
---|---|---|---|---|---|
<fct> | <fct> | <fct> | <fct> | <fct> | <fct> |
A30~A2MYE2 | A2M~P01023 | 0.2892365267 | 0.0 | 0.0 | TRUE |
ABI3BP~Q7Z7G0 | A30~A2MYE2 | -0.3129542643 | 0.0 | 0.0 | TRUE |
ACE~P12821 | A2M~P01023 | 0.2904851136 | 0.00059052 | 0.00164552 | TRUE |
ACE~P12821 | ABI3BP~Q7Z7G0 | 0.0026333708 | 3e-08 | 2e-07 | TRUE |
ACTB~P60709 | A2M~P01023 | -0.1503252993 | 1.4e-07 | 8.5e-07 | TRUE |
ACTB~P60709 | ABI3BP~Q7Z7G0 | -0.1213572078 | 0.00959561 | 0.01965832 | TRUE |
ACTB~P60709 | ACE~P12821 | -0.0120569051 | 0.00172883 | 0.00428982 | TRUE |
ACTN1~P12814 | A30~A2MYE2 | -0.0953185874 | 4.137e-05 | 0.00014992 | TRUE |
ACTN1~P12814 | ABI3BP~Q7Z7G0 | 0.2159122385 | 0.0 | 0.0 | TRUE |
ACTN1~P12814 | ACE~P12821 | 0.0337919091 | 0.00146044 | 0.00369332 | TRUE |
ACTN1~P12814 | ACTB~P60709 | 0.2556916718 | 2.296e-05 | 8.778e-05 | TRUE |
ADA2~Q9NZK5 | A2M~P01023 | 0.2405633762 | 0.0 | 1e-08 | TRUE |
ADA2~Q9NZK5 | ABI3BP~Q7Z7G0 | -0.1378733998 | 0.0068122 | 0.01452743 | TRUE |
ADA2~Q9NZK5 | ACTB~P60709 | 0.0488541846 | 0.00228905 | 0.00551185 | TRUE |
ADA2~Q9NZK5 | ACTN1~P12814 | 0.1606489207 | 9.2e-07 | 4.71e-06 | TRUE |
ADAMTS13~Q76LX8 | A2M~P01023 | 0.228562439 | 1.3e-07 | 7.7e-07 | TRUE |
ADAMTS13~Q76LX8 | ACE~P12821 | 0.1100432326 | 9.62e-06 | 3.996e-05 | TRUE |
ADAMTS13~Q76LX8 | ACTB~P60709 | -0.1298491445 | 0.0 | 0.0 | TRUE |
ADAMTS13~Q76LX8 | ADA2~Q9NZK5 | 0.0882539027 | 0.0 | 0.0 | TRUE |
ADAMTSL4~Q6UY14 | A2M~P01023 | -0.0167111069 | 0.0258627 | 0.0469946 | TRUE |
ADAMTSL4~Q6UY14 | ABI3BP~Q7Z7G0 | -0.0297828179 | 2.1e-07 | 1.21e-06 | TRUE |
ADAMTSL4~Q6UY14 | ACE~P12821 | 0.1867241047 | 2e-08 | 1.1e-07 | TRUE |
ADAMTSL4~Q6UY14 | ACTB~P60709 | 0.0422307278 | 0.0 | 0.0 | TRUE |
ADAMTSL4~Q6UY14 | ACTN1~P12814 | 0.167608862 | 0.0 | 0.0 | TRUE |
ADAMTSL4~Q6UY14 | ADA2~Q9NZK5 | -0.0456140059 | 3.96e-06 | 1.779e-05 | TRUE |
ADH4~P08319 | A2M~P01023 | -0.1135971714 | 0.01333768 | 0.02626505 | TRUE |
ADH4~P08319 | A30~A2MYE2 | 0.0504134162 | 0.0 | 0.0 | TRUE |
ADH4~P08319 | ACE~P12821 | 0.065413239 | 0.0 | 0.0 | TRUE |
ADH4~P08319 | ACTB~P60709 | -0.0805120054 | 0.0 | 0.0 | TRUE |
ADH4~P08319 | ACTN1~P12814 | 0.0217710049 | 0.0 | 0.0 | TRUE |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
scFv~Q65ZC9 | SERPINA5~P05154 | -0.113307701 | 1.005e-05 | 4.157e-05 | TRUE |
scFv~Q65ZC9 | SERPINA7~P05543 | 0.0029215059 | 0.00655886 | 0.01404733 | TRUE |
scFv~Q65ZC9 | SERPINF1~P36955 | -0.0358642698 | 0.0 | 0.0 | TRUE |
scFv~Q65ZC9 | SERPING1~P05155 | -0.4645053826 | 0.01186057 | 0.02368813 | TRUE |
scFv~Q65ZC9 | SHBG~P04278 | 0.0597719401 | 5.92e-06 | 2.568e-05 | TRUE |
scFv~Q65ZC9 | SNCA~P37840 | 0.0526730489 | 0.0 | 0.0 | TRUE |
scFv~Q65ZC9 | SOD3~P08294 | 0.065714844 | 0.00134567 | 0.00343099 | TRUE |
scFv~Q65ZC9 | SPARCL1~Q14515 | 0.2050409349 | 4e-08 | 2.6e-07 | TRUE |
scFv~Q65ZC9 | SPINK5~Q9NQ38 | 0.0993925633 | 0.0 | 0.0 | TRUE |
scFv~Q65ZC9 | SPP2~Q13103 | 0.1951832721 | 0.00019287 | 0.00060287 | TRUE |
scFv~Q65ZC9 | STMN1~P16949 | -0.1916459118 | 0.0 | 0.0 | TRUE |
scFv~Q65ZC9 | THBS1~P07996 | -0.0833391798 | 1.277e-05 | 5.16e-05 | TRUE |
scFv~Q65ZC9 | THBS4~P35443 | -0.010593396 | 0.00043177 | 0.00124437 | TRUE |
scFv~Q65ZC9 | TKT~P29401 | -0.244463802 | 0.00138887 | 0.00353092 | TRUE |
scFv~Q65ZC9 | TNC~A0A024R884 | 0.2552310889 | 0.0 | 0.0 | TRUE |
scFv~Q65ZC9 | TNN~Q9UQP3 | -0.1075661031 | 0.00242762 | 0.00580852 | TRUE |
scFv~Q65ZC9 | TNXB~P22105 | 0.2817641092 | 3.13e-05 | 0.00011634 | TRUE |
scFv~Q65ZC9 | TPI1~P60174 | -0.2298225046 | 0.0 | 1e-08 | TRUE |
scFv~Q65ZC9 | V1-13~Q5NV69 | 0.1054214299 | 0.00306398 | 0.00714884 | TRUE |
scFv~Q65ZC9 | V2-13~Q5NV73 | 0.4771154135 | 2e-08 | 1.3e-07 | TRUE |
scFv~Q65ZC9 | V4-2~Q5NV82 | 0.2165834919 | 1.139e-05 | 4.654e-05 | TRUE |
scFv~Q65ZC9 | V5-2~A2MYC8 | 0.1808639979 | 0.0 | 0.0 | TRUE |
scFv~Q65ZC9 | V5-4~Q5NV79 | -0.0853365176 | 6.394e-05 | 0.0002221 | TRUE |
scFv~Q65ZC9 | VASN~Q6EMK4 | 0.1403713766 | 4e-08 | 2.9e-07 | TRUE |
scFv~Q65ZC9 | VCAM1~P19320 | -0.0542730041 | 0.00915923 | 0.01886712 | TRUE |
scFv~Q65ZC9 | VH6DJ~A2N0T4 | 0.2625776727 | 8e-08 | 5e-07 | TRUE |
scFv~Q65ZC9 | VIM~P08670 | 0.0453630504 | 0.00012994 | 0.00042167 | TRUE |
scFv~Q65ZC9 | VK3~A2N2F4 | 0.1745551542 | 0.02251528 | 0.04162447 | TRUE |
scFv~Q65ZC9 | VNN1~O95497 | -0.2329496712 | 0.01174417 | 0.02348694 | TRUE |
scFv~Q65ZC9 | YWHAZ~P63104 | 0.2255080844 | 2.1e-07 | 1.21e-06 | TRUE |
[ ]: