Project notebook with R

  • Easy access to all the experimental data, statistical analysis results and visualization of reports using R

  • All the R functionality at hand

Import R libraries

Make sure that all the libraries you plan to use have been previously installed.

If not, you can install them through the notebook.

E.g. install.packages(“tidyverse”)


Define custom functions

Within the Jupyter notebook you can create all the functions relevant to you work that are not implemented in the analytics core. This is the case of R functions. Here we define R functions that allow us to load a project from the CKG database, access different datasets and plots, and visualize the latter in the notebook.

importGraph <- function(json_network){
  edges_df = data.frame(matrix(unlist(json_network$net_json$links), nrow=length(json_network$net_json$links), byrow=T))
  edges_df <- edges_df[c("source", "target", "width")]
  colnames(edges_df)<-c("source", "target", "weight")
  nodes = data.frame(matrix(unlist(json_network$net_json$nodes), nrow=length(json_network$net_json$nodes), byrow=T))
  nodes['ident']<- 0:(dim(nodes)[1]-1)
  nodes <- nodes[c("ident", "id", "color")]
  colnames(nodes) <- c("id", "label", "color")
  edges <- edges_df %>%
    left_join(nodes, by = c("source" = "label")) %>%
    rename(from = id)
  edges <- edges %>%
    left_join(nodes, by = c("target" = "label")) %>%
    rename(to = id)
  edges <- edges[c("from", "to", "weight")]
  g<-forceNetwork(Links = edges, Nodes = nodes,
                  height = 850, width = 800,
                  Source = "from", Target = "to",
                  Value = "weight", NodeID = "label",
                  Group = "color", opacity = 0.8,opacityNoHover = 1,
                  zoom = T, linkWidth = JS("function(d) { return Math.sqrt(d.weight); }"))

read_report <- function(report_file, report_name){
  plots <- c()
  nets <- c()
    content <- h5ls(report_file)
    foreach(group=unique(content$group)) %do% {
      if(group != "/"){
        if(!grepl('Table', group)){
          group = substring(group, 2)
          report_figure <- h5read(report_file, group)
          foreach(name=unique(names(report_figure))) %do% {
              p = report_figure_str$props$figure
              plot <- list(plot=p)
              names(plot) <- paste(group,name,sep="_")
              plots<-append(plots, plot)
            else if(grepl('net', name)){
              report_net_str <- fromJSON(report_figure[[name]][1])
              net <- list(net=report_net_str)
              names(net) <- name
              nets<-append(nets, net)
  report <- list(name = report_name, plots = plots, nets = nets)
  class(report) <- "report"


read_dataset <- function(dataset_file, dataset_name){
  dataframes <- c()
    content <- h5ls(dataset_file)
    foreach(group=unique(content$group)) %do% {
      if(group != "/"){
        group = substring(group, 2)
        dataframe_json <- h5read(dataset_file, group)
        foreach(name=unique(names(dataframe_json))) %do% {
          df <- data.frame(matrix(unlist(dataframe_str), nrow=length(dataframe_str), byrow=T))
          dataframe <- list(data=df)
          names(dataframe) <- name
          dataframes<-append(dataframes, dataframe)
  dataset <- list(name=dataset_name, data=dataframes)
  class(dataset) <- "dataset"


load_project <- function(project_id, dataset){
  plots = c()
  nets = c()
  project_report_dir <- paste("../../../data/reports/", project_id, sep = "")
  project_dataset_report_dir <- paste(project_report_dir,dataset, sep="/")
  report_file = paste(project_dataset_report_dir, "report.h5", sep = "/")
  datasets_file = paste(project_dataset_report_dir, paste(dataset,"dataset.h5",sep='_'), sep = "/")
  report = read_report(report_file, paste(project_id, "Report", dataset))
  datasets = read_dataset(datasets_file, paste(project_id, "Dataset", dataset))
  project <- list(id=project_id, report=report, datasets=datasets)

Create a project object by loading an existent report

p = load_project("P0000001", "proteomics")
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [56749] is not a sub-multiple or multiple of the number of rows [17298]”
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [942] is not a sub-multiple or multiple of the number of rows [188]”
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [140] is not a sub-multiple or multiple of the number of rows [8]”
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [152735] is not a sub-multiple or multiple of the number of rows [26135]”
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [152735] is not a sub-multiple or multiple of the number of rows [26135]”
Warning message in matrix(unlist(dataframe_str), nrow = length(dataframe_str), byrow = T):
“data length [19912] is not a sub-multiple or multiple of the number of rows [6187]”

Visualize the list of plots contained in the report

  1. '0~proteomics_pipeline~cytoscape_network_0_figure'
  2. '11~stratification_description~description_0_figure'
  3. '12~stratification_pca~pca_0_figure'
  4. '13~regulation_description~description_0_figure'
  5. '15~regulation_samr~volcanoplot_0_figure'
  6. '15~regulation_samr~volcanoplot_1_figure'
  7. '15~regulation_samr~volcanoplot_2_figure'
  8. '15~regulation_samr~volcanoplot_3_figure'
  9. '15~regulation_samr~volcanoplot_4_figure'
  10. '15~regulation_samr~volcanoplot_5_figure'
  11. '15~regulation_samr~volcanoplot_6_figure'
  12. '15~regulation_samr~volcanoplot_7_figure'
  13. '15~regulation_samr~volcanoplot_8_figure'
  14. '15~regulation_samr~volcanoplot_9_figure'
  15. '22~literature_associations_publications_abstracts~wordcloud_0_figure'
  16. '2~peptides~barplot_0_figure'
  17. '4~proteins~barplot_0_figure'
  18. '6~modifications~facetplot_0_figure'
  19. '8~coefficient_variation_coefficient_of_variation~scatterplot_matrix_0_figure'
  20. '9~ranking_ranking_with_markers~ranking_0_figure'

Access a specific plot and use plotly to visualize it


In the case of networks, they have to be converted from a json format, to an edge list. We use R’s networkd3 to create a D3 JavaScript force directed network graph from it.

Warning message:
“Column `source`/`label` joining factors with different levels, coercing to character vector”
Warning message:
“Column `target`/`label` joining factors with different levels, coercing to character vector”

We can also, easily, access the different datasets from the project

  1. 'complex_associations'
  2. 'correlation_correlation'
  3. 'disease_associations'
  4. 'drug_associations'
  5. 'go annotation'
  6. 'go_enrichment_Biological_processes_regulation_enrichment'
  7. 'interaction_network'
  8. 'literature_associations_publications_abstracts'
  9. 'number of modified proteins'
  10. 'number of peptides'
  11. 'number of proteins'
  12. 'original'
  13. 'overview statistics_summary'
  14. 'pathway annotation'
  15. 'pathway_enrichment_Pathways_regulation_enrichment'
  16. 'processed'
  17. 'protein biomarkers'
  18. 'regulated'
  19. 'regulation table'
  20. 'Data Matrix Shape'
  21. 'Stats'
A data.frame: 74922 × 6
A30~A2MYE2 A2M~P01023 0.2892365267 0.0 0.0 TRUE
ABI3BP~Q7Z7G0 A30~A2MYE2 -0.31295426430.0 0.0 TRUE
ACE~P12821 A2M~P01023 0.2904851136 0.000590520.00164552TRUE
ACE~P12821 ABI3BP~Q7Z7G00.0026333708 3e-08 2e-07 TRUE
ACTB~P60709 A2M~P01023 -0.15032529931.4e-07 8.5e-07 TRUE
ACTB~P60709 ABI3BP~Q7Z7G0-0.12135720780.009595610.01965832TRUE
ACTB~P60709 ACE~P12821 -0.01205690510.001728830.00428982TRUE
ACTN1~P12814 A30~A2MYE2 -0.09531858744.137e-05 0.00014992TRUE
ACTN1~P12814 ABI3BP~Q7Z7G00.2159122385 0.0 0.0 TRUE
ACTN1~P12814 ACE~P12821 0.0337919091 0.001460440.00369332TRUE
ACTN1~P12814 ACTB~P60709 0.2556916718 2.296e-05 8.778e-05 TRUE
ADA2~Q9NZK5 A2M~P01023 0.2405633762 0.0 1e-08 TRUE
ADA2~Q9NZK5 ABI3BP~Q7Z7G0-0.13787339980.0068122 0.01452743TRUE
ADA2~Q9NZK5 ACTB~P60709 0.0488541846 0.002289050.00551185TRUE
ADA2~Q9NZK5 ACTN1~P12814 0.1606489207 9.2e-07 4.71e-06 TRUE
ADAMTS13~Q76LX8A2M~P01023 0.228562439 1.3e-07 7.7e-07 TRUE
ADAMTS13~Q76LX8ACE~P12821 0.1100432326 9.62e-06 3.996e-05 TRUE
ADAMTS13~Q76LX8ACTB~P60709 -0.12984914450.0 0.0 TRUE
ADAMTS13~Q76LX8ADA2~Q9NZK5 0.0882539027 0.0 0.0 TRUE
ADAMTSL4~Q6UY14A2M~P01023 -0.01671110690.0258627 0.0469946 TRUE
ADAMTSL4~Q6UY14ABI3BP~Q7Z7G0-0.02978281792.1e-07 1.21e-06 TRUE
ADAMTSL4~Q6UY14ACE~P12821 0.1867241047 2e-08 1.1e-07 TRUE
ADAMTSL4~Q6UY14ACTB~P60709 0.0422307278 0.0 0.0 TRUE
ADAMTSL4~Q6UY14ACTN1~P12814 0.167608862 0.0 0.0 TRUE
ADAMTSL4~Q6UY14ADA2~Q9NZK5 -0.04561400593.96e-06 1.779e-05 TRUE
ADH4~P08319 A2M~P01023 -0.11359717140.013337680.02626505TRUE
ADH4~P08319 A30~A2MYE2 0.0504134162 0.0 0.0 TRUE
ADH4~P08319 ACE~P12821 0.065413239 0.0 0.0 TRUE
ADH4~P08319 ACTB~P60709 -0.08051200540.0 0.0 TRUE
ADH4~P08319 ACTN1~P12814 0.0217710049 0.0 0.0 TRUE
scFv~Q65ZC9SERPINA5~P05154-0.113307701 1.005e-05 4.157e-05 TRUE
scFv~Q65ZC9SERPINA7~P055430.0029215059 0.006558860.01404733TRUE
scFv~Q65ZC9SERPINF1~P36955-0.03586426980.0 0.0 TRUE
scFv~Q65ZC9SHBG~P04278 0.0597719401 5.92e-06 2.568e-05 TRUE
scFv~Q65ZC9SNCA~P37840 0.0526730489 0.0 0.0 TRUE
scFv~Q65ZC9SOD3~P08294 0.065714844 0.001345670.00343099TRUE
scFv~Q65ZC9SPARCL1~Q14515 0.2050409349 4e-08 2.6e-07 TRUE
scFv~Q65ZC9SPINK5~Q9NQ38 0.0993925633 0.0 0.0 TRUE
scFv~Q65ZC9SPP2~Q13103 0.1951832721 0.000192870.00060287TRUE
scFv~Q65ZC9STMN1~P16949 -0.19164591180.0 0.0 TRUE
scFv~Q65ZC9THBS1~P07996 -0.08333917981.277e-05 5.16e-05 TRUE
scFv~Q65ZC9THBS4~P35443 -0.010593396 0.000431770.00124437TRUE
scFv~Q65ZC9TKT~P29401 -0.244463802 0.001388870.00353092TRUE
scFv~Q65ZC9TNC~A0A024R884 0.2552310889 0.0 0.0 TRUE
scFv~Q65ZC9TNN~Q9UQP3 -0.10756610310.002427620.00580852TRUE
scFv~Q65ZC9TNXB~P22105 0.2817641092 3.13e-05 0.00011634TRUE
scFv~Q65ZC9TPI1~P60174 -0.22982250460.0 1e-08 TRUE
scFv~Q65ZC9V1-13~Q5NV69 0.1054214299 0.003063980.00714884TRUE
scFv~Q65ZC9V2-13~Q5NV73 0.4771154135 2e-08 1.3e-07 TRUE
scFv~Q65ZC9V4-2~Q5NV82 0.2165834919 1.139e-05 4.654e-05 TRUE
scFv~Q65ZC9V5-2~A2MYC8 0.1808639979 0.0 0.0 TRUE
scFv~Q65ZC9V5-4~Q5NV79 -0.08533651766.394e-05 0.0002221 TRUE
scFv~Q65ZC9VASN~Q6EMK4 0.1403713766 4e-08 2.9e-07 TRUE
scFv~Q65ZC9VCAM1~P19320 -0.05427300410.009159230.01886712TRUE
scFv~Q65ZC9VH6DJ~A2N0T4 0.2625776727 8e-08 5e-07 TRUE
scFv~Q65ZC9VIM~P08670 0.0453630504 0.000129940.00042167TRUE
scFv~Q65ZC9VK3~A2N2F4 0.1745551542 0.022515280.04162447TRUE
scFv~Q65ZC9VNN1~O95497 -0.23294967120.011744170.02348694TRUE
scFv~Q65ZC9YWHAZ~P63104 0.2255080844 2.1e-07 1.21e-06 TRUE
[ ]: