feat(refactor): comment out python vs R loading options in `Asses…

…sing...-workshop.RMD`
alan-turing-institute · Dec 6, 2023 · 4ddee59 · 4ddee59
1 parent 0de11c3
commit 4ddee59
Showing 1 changed file with 77 additions and 68 deletions.
diff --git a/notebooks/Assessing_bc_data/Assessing_BC_Data-workshop.RMD b/notebooks/Assessing_bc_data/Assessing_BC_Data-workshop.RMD
@@ -20,13 +20,16 @@ rm(list=ls())
 
 knitr::opts_knit$set(root.dir="/mnt/vmfileshare/ClimateData/")
 
+# Add in the automatic installation process
 
 library(ggplot2)
 library(terra)
 library(tmap) #pretty maps
 library(RColorBrewer)
 library(tidyverse)
 library(kableExtra)
+install.package("ncdf4")
+library(ncdf4)
 
 if (!require(devtools)) install.packages("devtools")
 library(devtools)
@@ -70,73 +73,79 @@ city <- "Glasgow"
 var <- "tasmax" 
 runs <- c("05", "06", "07", "08")
 
-if(input=="raster"){
 
 ####### PYTHON INPUTS HERE ######
-  # This script uses both raster data and the raw data
-  # This script uses Lists to group everything by runs
-  # Therefore what is require from this here is to create a list object for each of the sets of the data as listed above, where the list items are the rasters or dataframes by run (ie each level of the list is a run)
-  # .nc and .tif files can be read with rast("path/to/file.nc")
-  # Conversion to df is just  as.data.frame(raster, xy=T) - easiest thing is just to loop using lapply the files 
-  #dfs are assumed to be cells x time 
-
-} else if (input=="RDS"){
-  ### This R bit is a bit crazy because of the format output from the bias correction - at some point to be cleaned up and moved to a different script. 
-  ## Load a source raster to extract the crs
-  r <- list.files(paste0(dd, "Reprojected_infill/UKCP2.2/tasmax/05/latest/"), full.names = T)[1]
-  rast <- rast(r)
-  crs <- crs(rast)
-  
-  ## The output created from the R bias correction framework is a list of dataframes containing all the data we need for this doc (although some are transposed).
-  rd <- "Debiased/R/QuantileMapping/three.cities/" 
-  
-  files <- list.files(paste0(dd,rd,city),full.names=T)
-  files.v <- files[grepl(var, files)]
-  
-  allruns <- lapply(files.v, readRDS)
-  
-  names <- gsub(paste0(dd,rd,city,"|/|.RDS"),"",files.v)
-  names(allruns) <- names
-  
-  #This was returned for ease where multiple runs have been looped to apply this paritcular function, but actually we don't need a cope for each nor this data in a list. Therefore: 
-  obs.cal.df <- as.data.frame(t(allruns[[1]]$t.obs))
-  
-  obs.val.df <- allruns[[1]]$val.df[c(1:3601)] #To run until 30th Nov 2020
-  
-  cpm.cal.raw.df.L <- lapply(allruns, function(L){
-    as.data.frame(t(L[["t.cal"]]))
-    }) 
-  
-  #In the R scirpt, the validation is corrected with the projected data as well - so needs to be seperated out (and transposed)
-  cpm.val.raw.df.L <- lapply(allruns, function(L){
-    proj <- as.data.frame(t(L[["t.proj"]]))
-    val.end.date <- min(grep("20201201-", names(proj)))-1
-    cpm.val.raw.df <- proj[,1:val.end.date] 
-  })
-  
-  cpm.proj.raw.df.L <- lapply(allruns, function(L){
-    proj <- as.data.frame(t(L[["t.proj"]]))
-    val.end.date <- min(grep("20201201-", names(proj)))
-    cpm.val.raw.df <- proj[,val.end.date:ncol(proj)] 
-  })
-  
-  cpm.cal.adj.df.L <- lapply(allruns, function(L){
-    adj <- as.data.frame(t(L[["qm1.hist"]]))
-  })
-  
-    cpm.val.adj.df.L <- lapply(allruns, function(L){
-    proj <- as.data.frame(t(L[["qm1.val.proj"]]))
-    val.end.date <- min(grep("20201201-", names(proj)))-1
-    proj[,1:val.end.date] 
-  })
-  
-      cpm.proj.adj.df.L <- lapply(allruns, function(L){
-    proj <- as.data.frame(t(L[["qm1.val.proj"]]))
-    val.end.date <- min(grep("20201201-", names(proj)))
-    proj[,val.end.date:ncol(proj)] 
-  })
-  
-  ## Convert to rasters --requires creation of x and y cols from row names
+# if(input=="raster"){
+# This script uses both raster data and the raw data
+# This script uses Lists to group everything by runs
+# Therefore what is require from this here is to create a list object for each of the sets of the data as listed above, where the list items are the rasters or dataframes by run (ie each level of the list is a run)
+# .nc and .tif files can be read with rast("path/to/file.nc")
+# Conversion to df is just  as.data.frame(raster, xy=T) - easiest thing is just to loop using lapply the files 
+#dfs are assumed to be cells x time 
+
+rd.python <- "Debiased/three.cities.cropped" 
+dd.python <- "/mnt/vmfileshare/ClimateData" #Data directory of all data used in this script
+#ncfname <- paste(dd, rd, city, runs[1], var, ".nc", sep="")
+r.python <- list.files(paste(dd.python, rd.python, city, runs[2], var, sep="/"), pattern="debiased*", full.names = T)
+ncin <- nc_open(r[1])
+
+# } else if (input=="RDS"){
+### This R bit is a bit crazy because of the format output from the bias correction - at some point to be cleaned up and moved to a different script. 
+## Load a source raster to extract the crs
+r <- list.files(paste0(dd, "Reprojected_infill/UKCP2.2/tasmax/05/latest/"), full.names = T)[1]
+rast <- rast(r)
+crs <- crs(rast)
+
+## The output created from the R bias correction framework is a list of dataframes containing all the data we need for this doc (although some are transposed).
+rd <- "Debiased/R/QuantileMapping/three.cities/" 
+
+files <- list.files(paste0(dd,rd,city),full.names=T)
+files.v <- files[grepl(var, files)]
+
+allruns <- lapply(files.v, readRDS)
+
+names <- gsub(paste0(dd,rd,city,"|/|.RDS"),"",files.v)
+names(allruns) <- names
+
+#This was returned for ease where multiple runs have been looped to apply this paritcular function, but actually we don't need a cope for each nor this data in a list. Therefore: 
+obs.cal.df <- as.data.frame(t(allruns[[1]]$t.obs))
+
+obs.val.df <- allruns[[1]]$val.df[c(1:3601)] #To run until 30th Nov 2020
+
+cpm.cal.raw.df.L <- lapply(allruns, function(L){
+  as.data.frame(t(L[["t.cal"]]))
+  }) 
+
+#In the R scirpt, the validation is corrected with the projected data as well - so needs to be seperated out (and transposed)
+cpm.val.raw.df.L <- lapply(allruns, function(L){
+  proj <- as.data.frame(t(L[["t.proj"]]))
+  val.end.date <- min(grep("20201201-", names(proj)))-1
+  cpm.val.raw.df <- proj[,1:val.end.date] 
+})
+
+cpm.proj.raw.df.L <- lapply(allruns, function(L){
+  proj <- as.data.frame(t(L[["t.proj"]]))
+  val.end.date <- min(grep("20201201-", names(proj)))
+  cpm.val.raw.df <- proj[,val.end.date:ncol(proj)] 
+})
+
+cpm.cal.adj.df.L <- lapply(allruns, function(L){
+  adj <- as.data.frame(t(L[["qm1.hist"]]))
+})
+
+  cpm.val.adj.df.L <- lapply(allruns, function(L){
+  proj <- as.data.frame(t(L[["qm1.val.proj"]]))
+  val.end.date <- min(grep("20201201-", names(proj)))-1
+  proj[,1:val.end.date] 
+})
+
+    cpm.proj.adj.df.L <- lapply(allruns, function(L){
+  proj <- as.data.frame(t(L[["qm1.val.proj"]]))
+  val.end.date <- min(grep("20201201-", names(proj)))
+  proj[,val.end.date:ncol(proj)] 
+})
+
+## Convert to rasters --requires creation of x and y cols from row names
 ## For the comparison, just converting the observation and cpm for the cal and val perios (ie not the projection datasets)      
       
 obsrastL <- lapply(list(obs.cal.df, obs.val.df), function(i){
@@ -182,9 +191,9 @@ remove(list2rast)
 
 gc()
 
- } else {
-  print("Invalid input")
-}
+# } else {
+#  print("Invalid input")
+#}