Skip to content

Commit

Permalink
feat(refactor): comment out python vs R loading options in `Asses…
Browse files Browse the repository at this point in the history
…sing...-workshop.RMD`
  • Loading branch information
spool committed Dec 6, 2023
1 parent 0de11c3 commit 4ddee59
Showing 1 changed file with 77 additions and 68 deletions.
145 changes: 77 additions & 68 deletions notebooks/Assessing_bc_data/Assessing_BC_Data-workshop.RMD
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,16 @@ rm(list=ls())
knitr::opts_knit$set(root.dir="/mnt/vmfileshare/ClimateData/")
# Add in the automatic installation process
library(ggplot2)
library(terra)
library(tmap) #pretty maps
library(RColorBrewer)
library(tidyverse)
library(kableExtra)
install.package("ncdf4")
library(ncdf4)
if (!require(devtools)) install.packages("devtools")
library(devtools)
Expand Down Expand Up @@ -70,73 +73,79 @@ city <- "Glasgow"
var <- "tasmax"
runs <- c("05", "06", "07", "08")
if(input=="raster"){
####### PYTHON INPUTS HERE ######
# This script uses both raster data and the raw data
# This script uses Lists to group everything by runs
# Therefore what is require from this here is to create a list object for each of the sets of the data as listed above, where the list items are the rasters or dataframes by run (ie each level of the list is a run)
# .nc and .tif files can be read with rast("path/to/file.nc")
# Conversion to df is just as.data.frame(raster, xy=T) - easiest thing is just to loop using lapply the files
#dfs are assumed to be cells x time
} else if (input=="RDS"){
### This R bit is a bit crazy because of the format output from the bias correction - at some point to be cleaned up and moved to a different script.
## Load a source raster to extract the crs
r <- list.files(paste0(dd, "Reprojected_infill/UKCP2.2/tasmax/05/latest/"), full.names = T)[1]
rast <- rast(r)
crs <- crs(rast)
## The output created from the R bias correction framework is a list of dataframes containing all the data we need for this doc (although some are transposed).
rd <- "Debiased/R/QuantileMapping/three.cities/"
files <- list.files(paste0(dd,rd,city),full.names=T)
files.v <- files[grepl(var, files)]
allruns <- lapply(files.v, readRDS)
names <- gsub(paste0(dd,rd,city,"|/|.RDS"),"",files.v)
names(allruns) <- names
#This was returned for ease where multiple runs have been looped to apply this paritcular function, but actually we don't need a cope for each nor this data in a list. Therefore:
obs.cal.df <- as.data.frame(t(allruns[[1]]$t.obs))
obs.val.df <- allruns[[1]]$val.df[c(1:3601)] #To run until 30th Nov 2020
cpm.cal.raw.df.L <- lapply(allruns, function(L){
as.data.frame(t(L[["t.cal"]]))
})
#In the R scirpt, the validation is corrected with the projected data as well - so needs to be seperated out (and transposed)
cpm.val.raw.df.L <- lapply(allruns, function(L){
proj <- as.data.frame(t(L[["t.proj"]]))
val.end.date <- min(grep("20201201-", names(proj)))-1
cpm.val.raw.df <- proj[,1:val.end.date]
})
cpm.proj.raw.df.L <- lapply(allruns, function(L){
proj <- as.data.frame(t(L[["t.proj"]]))
val.end.date <- min(grep("20201201-", names(proj)))
cpm.val.raw.df <- proj[,val.end.date:ncol(proj)]
})
cpm.cal.adj.df.L <- lapply(allruns, function(L){
adj <- as.data.frame(t(L[["qm1.hist"]]))
})
cpm.val.adj.df.L <- lapply(allruns, function(L){
proj <- as.data.frame(t(L[["qm1.val.proj"]]))
val.end.date <- min(grep("20201201-", names(proj)))-1
proj[,1:val.end.date]
})
cpm.proj.adj.df.L <- lapply(allruns, function(L){
proj <- as.data.frame(t(L[["qm1.val.proj"]]))
val.end.date <- min(grep("20201201-", names(proj)))
proj[,val.end.date:ncol(proj)]
})
## Convert to rasters --requires creation of x and y cols from row names
# if(input=="raster"){
# This script uses both raster data and the raw data
# This script uses Lists to group everything by runs
# Therefore what is require from this here is to create a list object for each of the sets of the data as listed above, where the list items are the rasters or dataframes by run (ie each level of the list is a run)
# .nc and .tif files can be read with rast("path/to/file.nc")
# Conversion to df is just as.data.frame(raster, xy=T) - easiest thing is just to loop using lapply the files
#dfs are assumed to be cells x time
rd.python <- "Debiased/three.cities.cropped"
dd.python <- "/mnt/vmfileshare/ClimateData" #Data directory of all data used in this script
#ncfname <- paste(dd, rd, city, runs[1], var, ".nc", sep="")
r.python <- list.files(paste(dd.python, rd.python, city, runs[2], var, sep="/"), pattern="debiased*", full.names = T)
ncin <- nc_open(r[1])
# } else if (input=="RDS"){
### This R bit is a bit crazy because of the format output from the bias correction - at some point to be cleaned up and moved to a different script.
## Load a source raster to extract the crs
r <- list.files(paste0(dd, "Reprojected_infill/UKCP2.2/tasmax/05/latest/"), full.names = T)[1]
rast <- rast(r)
crs <- crs(rast)
## The output created from the R bias correction framework is a list of dataframes containing all the data we need for this doc (although some are transposed).
rd <- "Debiased/R/QuantileMapping/three.cities/"
files <- list.files(paste0(dd,rd,city),full.names=T)
files.v <- files[grepl(var, files)]
allruns <- lapply(files.v, readRDS)
names <- gsub(paste0(dd,rd,city,"|/|.RDS"),"",files.v)
names(allruns) <- names
#This was returned for ease where multiple runs have been looped to apply this paritcular function, but actually we don't need a cope for each nor this data in a list. Therefore:
obs.cal.df <- as.data.frame(t(allruns[[1]]$t.obs))
obs.val.df <- allruns[[1]]$val.df[c(1:3601)] #To run until 30th Nov 2020
cpm.cal.raw.df.L <- lapply(allruns, function(L){
as.data.frame(t(L[["t.cal"]]))
})
#In the R scirpt, the validation is corrected with the projected data as well - so needs to be seperated out (and transposed)
cpm.val.raw.df.L <- lapply(allruns, function(L){
proj <- as.data.frame(t(L[["t.proj"]]))
val.end.date <- min(grep("20201201-", names(proj)))-1
cpm.val.raw.df <- proj[,1:val.end.date]
})
cpm.proj.raw.df.L <- lapply(allruns, function(L){
proj <- as.data.frame(t(L[["t.proj"]]))
val.end.date <- min(grep("20201201-", names(proj)))
cpm.val.raw.df <- proj[,val.end.date:ncol(proj)]
})
cpm.cal.adj.df.L <- lapply(allruns, function(L){
adj <- as.data.frame(t(L[["qm1.hist"]]))
})
cpm.val.adj.df.L <- lapply(allruns, function(L){
proj <- as.data.frame(t(L[["qm1.val.proj"]]))
val.end.date <- min(grep("20201201-", names(proj)))-1
proj[,1:val.end.date]
})
cpm.proj.adj.df.L <- lapply(allruns, function(L){
proj <- as.data.frame(t(L[["qm1.val.proj"]]))
val.end.date <- min(grep("20201201-", names(proj)))
proj[,val.end.date:ncol(proj)]
})
## Convert to rasters --requires creation of x and y cols from row names
## For the comparison, just converting the observation and cpm for the cal and val perios (ie not the projection datasets)
obsrastL <- lapply(list(obs.cal.df, obs.val.df), function(i){
Expand Down Expand Up @@ -182,9 +191,9 @@ remove(list2rast)
gc()
} else {
print("Invalid input")
}
# } else {
# print("Invalid input")
#}
Expand Down

0 comments on commit 4ddee59

Please sign in to comment.