From 6049fcaacf51ce4846bd2a057082263eb6a560ed Mon Sep 17 00:00:00 2001
From: Dr Griffith Rees <griff.rees@gmail.com>
Date: Wed, 22 Nov 2023 16:33:45 +0000
Subject: [PATCH] fix(ci): add  `pre-commit` to check `R/` path and apply

---
 .github/workflows/ci.yaml                     |   4 +-
 .pre-commit-config.yaml                       |   2 +-
 R/LCAT/Data_Processing_todf.R                 |  51 +-
 R/LCAT/LCATv_apply_qmapQuant_to_crpd_df_fn.R  | 464 +++++++++---------
 R/README.md                                   |   8 +-
 R/Resampling/Resampling.HADs.inR.R            |  22 +-
 R/bias-correction-methods/ThreeCitiesQmap.RMD |  20 +-
 R/bias-correction-methods/WIP_EQM.RMD         |  35 +-
 .../apply_qmapQuant_to_crpd_df_fn.R           |  43 +-
 .../WIP Comparing HADs grids.Rmd              |  35 +-
 .../WIP-Comparing-HADs-grids.Rmd              |  35 +-
 .../WIP-Comparing-HADs-grids.md               |  14 +-
 .../converting_city_crops_to_df.R             | 118 ++---
 README.md                                     |   2 +-
 14 files changed, 420 insertions(+), 433 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 8ca3d40a..38e4d4e9 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -14,11 +14,11 @@ env:
 
 on:
   pull_request:
-    branches: ['main', 'docker-config']
+    branches: ['main', 'ruth-notebook-for-workshop']
     paths-ignore: ['docs/**']
 
   push:
-    branches: ['main', 'docker-config']
+    branches: ['main', 'ruth-notebook-for-workshop']
     paths-ignore: ['docs/**']
 
 concurrency:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bd658c3a..4ac55731 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,4 @@
-exclude: "R|data"
+exclude: "data"
 repos:
   - repo: https://github.com/psf/black
     rev: "23.9.1"
diff --git a/R/LCAT/Data_Processing_todf.R b/R/LCAT/Data_Processing_todf.R
index 89d15d15..fdea70a4 100644
--- a/R/LCAT/Data_Processing_todf.R
+++ b/R/LCAT/Data_Processing_todf.R
@@ -1,12 +1,12 @@
-## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## 
+## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
 ## Data Processing UK HadsGrid CPM data from raster to data.frame
 
 
 ## 0. About
 
-# Many of the methods packages for applying bias correction to climate date take as input vector, matrix or data.frame, 
+# Many of the methods packages for applying bias correction to climate date take as input vector, matrix or data.frame,
 # rather than a spatial file or raster
-# This may not be the most effective way of analysising this data 
+# This may not be the most effective way of analysising this data
 
 
 rm(list=ls())
@@ -47,16 +47,16 @@ cores <- detectCores()
 cl <- makeCluster(cores[1]-1)
 registerDoParallel(cl)
 
-    foreach(x = x, 
+    foreach(x = x,
          .packages = c("terra", "tidyverse"),
-             .errorhandling = 'pass') %dopar% { 
+             .errorhandling = 'pass') %dopar% {
 
                f <- paste0(dd,'shapefiles/NUTS_Level_1_January_2018_FCB_in_the_United_Kingdom_2022_7279368953270783580/NUTS_Level_1_January_2018_FCB_in_the_United_Kingdom.shp')
                UK.shape <-vect(f)
                crop.area <- UK.shape[which(UK.shape$nuts118cd==x)]
-                       
+
               var <- c("rainfall", "tasmax", "tasmin", "tas")
-              
+
               hads19802010_read_crop_df_write(var = var,
                                               fp = paste0(dd,  "Processed/HadsUKgrid/resampled_2.2km/"),
                                               name1 = "HadsUK",
@@ -64,45 +64,42 @@ registerDoParallel(cl)
                                               crop.area=crop.area,
                                               cropname=x,
                                               rd=rd)
-                      
+
                      }
-  
-  stopCluster(cl) 
+
+  stopCluster(cl)
   gc()
 
 
 ### Processing CPM
-  
+
   x <- regioncd
-  
+
 
   cores <- detectCores()
   cl <- makeCluster(cores[1]-1)
   registerDoParallel(cl)
-  
-  foreach(x = x, 
+
+  foreach(x = x,
           .packages = c("terra", "tidyverse"),
-          .errorhandling = 'pass') %dopar% { 
-            
+          .errorhandling = 'pass') %dopar% {
+
             f <- paste0(dd,'shapefiles/NUTS_Level_1_January_2018_FCB_in_the_United_Kingdom_2022_7279368953270783580/NUTS_Level_1_January_2018_FCB_in_the_United_Kingdom.shp')
             UK.shape <-vect(f)
             crop.area <- UK.shape[which(UK.shape$nuts118cd==x)]
-            
+
             runs <- c("05", "07", "08", "06")
             var <- c("tasmax", "tasmin","pr", "tas")
             rd <- paste0(dd, "Interim/CPM/Data_as_df")
-            
+
             cpm_read_crop_df_write(runs=runs, var=var, fp=paste0(dd, "Reprojected/UKCP2.2/"),
-                               year1=2060, year2=2080, #Ran sep for each year segment 
-                               name1="CPM", crop = T, 
+                               year1=2060, year2=2080, #Ran sep for each year segment
+                               name1="CPM", crop = T,
                                crop.area = crop.area, cropname = x,
                                rd=paste0(dd, "Interim/CPM/Data_as_df"))
-            
-            
+
+
           }
-  
-  stopCluster(cl) 
+
+  stopCluster(cl)
   gc()
-  
-  
-  
diff --git a/R/LCAT/LCATv_apply_qmapQuant_to_crpd_df_fn.R b/R/LCAT/LCATv_apply_qmapQuant_to_crpd_df_fn.R
index 7ab0cac2..639aa7be 100644
--- a/R/LCAT/LCATv_apply_qmapQuant_to_crpd_df_fn.R
+++ b/R/LCAT/LCATv_apply_qmapQuant_to_crpd_df_fn.R
@@ -1,5 +1,5 @@
 #This version of this file was used to process LCAT files. Later version of this files will be renamed and split
-##Loading data as created in 'Data_Processing_todf.R' 
+##Loading data as created in 'Data_Processing_todf.R'
 
 #Requires
 library(tidyverse)
@@ -11,7 +11,7 @@ apply_bias_correction_to_cropped_df <- function(region, #Region code - needs to
                                                 var, #Meterological variables
                                                 Runs){
 
-  i <- region   
+  i <- region
 
 for(r in Runs){
   for(v in var){
@@ -26,8 +26,8 @@ for(r in Runs){
 
         #subset file list to var
         obs.var <-  obs[grepl(v,obs)]
-        
-        #subset to calibration years  
+
+        #subset to calibration years
         obs.varc <- obs.var[grepl("1980", obs.var)]
         obs.df <- fread(paste0(fp, obs.varc))
         obs.df <- as.data.frame(obs.df)
@@ -36,12 +36,12 @@ for(r in Runs){
         obs.df$x <- NULL
         obs.df$y <- NULL
 
-        #Remove the dates not in the cpm 
+        #Remove the dates not in the cpm
         ## find col position of the first cpm date 19801201
         n1 <-min(grep("19801201", names(obs.df)))
         obs.df <- obs.df[c(n1:ncol(obs.df))]
-        
-        
+
+
         #Using 1980 - 2010 as calibration period
         fp <- paste0(dd, "Interim/CPM/Data_as_df/")
         cpm.files <- list.files(fp)
@@ -49,44 +49,44 @@ for(r in Runs){
         #Calibration years 1980 - 2010  - load in full one for 1980 - 2000
         cpm.cal <- cpm.files[grepl("1980|2000", cpm.files)]
 
-        #Subset file list to area 
+        #Subset file list to area
         cpm.cal <- cpm.cal[grepl(i, cpm.cal)]
 
         #subset to var and run
         cpm.cal.var <- cpm.cal[grepl(v, cpm.cal)&grepl(r, cpm.cal)]
 
-        #Load in 
+        #Load in
         cal.df <- lapply(cpm.cal.var, function(x){
         df <- fread(paste0(fp, x))
         df <- as.data.frame(df)
-        
+
         row.names(df)<- paste0(df$x, "_", df$y)
         df$x <- NULL
         df$y <- NULL
         return(df)
         })
-        
+
         cal.df <- cal.df %>% reduce(cbind)
 
-        #Sub out beyond cal period (2010 - 2020) - ie just keep the calibration here 
-        #Keep all of the files with these years - because the naming convention runs 
+        #Sub out beyond cal period (2010 - 2020) - ie just keep the calibration here
+        #Keep all of the files with these years - because the naming convention runs
         #from Nov to following year we need to just take the first 30 days of the one starting with 20091201-
         n2 <- min(grep("20091201-",names(cal.df))) + 29
-        
-        #This is the first part of the validation dataset, but all the val will be added to the projection df for 
+
+        #This is the first part of the validation dataset, but all the val will be added to the projection df for
         #the sake of bias correction and assessed separately
         proj.df1 <- cal.df[c((n2+1):ncol(cal.df))]
         cal.df <- cal.df[c(1:n2)]
-        
-        gc() 
-  
+
+        gc()
+
         yi <- paste0(i,c(2020,2040,2060), collapse="|")
         cpm.proj <- cpm.files[grepl(yi, cpm.files)]
 
         #Subset to Area, var and run
         cpm.proj <- cpm.proj[grepl(i, cpm.proj)&grepl(v, cpm.proj)&grepl(r, cpm.proj)]
 
-        #Load in 
+        #Load in
         proj.df2 <- lapply(cpm.proj, function(x){
           df <- as.data.frame(fread(paste0(fp, x)))
           #Remove x and y cols
@@ -96,32 +96,32 @@ for(r in Runs){
           names(proj.df2) <- cpm.proj
 
         proj.df <- c(list(proj.df1), proj.df2) %>% reduce(cbind)
-  
+
         remove("proj.df1")
         remove("proj.df2")
 
 ## **2. Wrangle the data**
-    
+
         #missing.in.hads.cpm.cal <- cal.df[-which(row.names(cal.df)%in%row.names(obs.df)),]
         #missing.in.hads.cpm.proj <- proj.df[-which(row.names(proj.df)%in%row.names(obs.df)),]
-  
+
 
           cal.df <- cal.df[which(row.names(cal.df)%in%row.names(obs.df)),]
           proj.df <- proj.df[which(row.names(proj.df)%in%row.names(obs.df)),]
- 
-        #save the missing outputs 
+
+        #save the missing outputs
         p <- paste0("checkpoint1", v, "_", i, "_", r, "_")
         print(p)
         #write.csv(missing.in.hads.cpm.cal, paste0(dd, "Debiased/R/QuantileMapping/missing.in.hads/",r,"_",i,"_",v, ".csv"))
-  
+
         ### Update obs data to 360 days
 
-  #The below is a work around with the HADS dataset having 365 days on leap years - this is to be updateed and corrected when the 360 day sampling is better sorted 
-      
+  #The below is a work around with the HADS dataset having 365 days on leap years - this is to be updateed and corrected when the 360 day sampling is better sorted
+
         #Convert obs to 360 day year - has 40 more vars so remove the ones not in cal
         remove <- c("0229_29", "0430_30", "0731_31", "0930_30", "1130_30")
         remove <- paste0(remove, collapse = "|")
-      
+
         obs.df <- obs.df[,!grepl(remove, names(obs.df))]
         #This still pulls in the 31st Dec 2009 for some reason is in the hads so manual remove
         obs.df <- obs.df[1:ncol(cal.df)]
@@ -146,8 +146,8 @@ for(r in Runs){
       library(qmap)
       qm1.fit <- fitQmapQUANT(obs.df, cal.df,
                         wet.day = FALSE,
-                        qstep = 0.01, 
-                        nboot = 1) #nboot number of bootstrap samples used for estimation of the observed quantiles. 
+                        qstep = 0.01,
+                        nboot = 1) #nboot number of bootstrap samples used for estimation of the observed quantiles.
 
 
         qm1.hist.a <- doQmapQUANT(cal.df, qm1.fit, type="linear")
@@ -170,232 +170,232 @@ for(r in Runs){
           p <- paste0("checkpoint5", v, "_", i, "_", r, "_")
           print(p)
           rm(list=setdiff(ls(), c("v", "i", "r", "var", "Runs")))
-          
+
           gc(reset=TRUE)
 
-           
+
  } else {
-   
-#### Precipitation - the HADs variable has is called 'rainfall' 
+
+#### Precipitation - the HADs variable has is called 'rainfall'
    dd <- "/mnt/vmfileshare/ClimateData/"
    #Subset to Area
    #HADs grid observational data
    fp <- paste0(dd, "Interim/HadsUK/Data_as_df/")
    files <- list.files(fp)
    obs <- files[grepl(i, files)]
-   
+
    #subset file list to var
    obs.var <-  obs[grepl("rainfall",obs)]
-   
-   #subset to calibration years 
+
+   #subset to calibration years
    obs.varc <- obs.var[grepl("1980", obs.var)]
    obs.df <- fread(paste0(fp, obs.varc))
    obs.df <- as.data.frame(obs.df)
-   
+
    row.names(obs.df) <- paste0(obs.df$x, "_", obs.df$y )
    obs.df$x <- NULL
    obs.df$y <- NULL
-   
-   #Remove the dates not in the cpm 
+
+   #Remove the dates not in the cpm
    ## find col position of the first cpm date 19801201
    n1 <-min(grep("19801201", names(obs.df)))
    obs.df <- obs.df[c(n1:ncol(obs.df))]
-   
-   
+
+
    #Using 1980 - 2010 as calibration period
    fp <- paste0(dd, "Interim/CPM/Data_as_df/")
    cpm.files <- list.files(fp)
-   
+
    #Calibration years 1980 - 2010  - load in full one for 1980 - 2000
    cpm.cal <- cpm.files[grepl("1980|2000", cpm.files)]
-   
-   #Subset file list to area 
+
+   #Subset file list to area
    cpm.cal <- cpm.cal[grepl(i, cpm.cal)]
-   
+
    #subset to var and run
    cpm.cal.var <- cpm.cal[grepl(v, cpm.cal)&grepl(r, cpm.cal)]
-   
-   #Load in 
+
+   #Load in
    cal.df <- lapply(cpm.cal.var, function(x){
      df <- fread(paste0(fp, x))
      df <- as.data.frame(df)
-     
+
      row.names(df)<- paste0(df$x, "_", df$y)
      df$x <- NULL
      df$y <- NULL
      return(df)
    })
-   
+
    cal.df <- cal.df %>% reduce(cbind)
-   
-   #Sub out beyond cal period (2010 - 2020) - ie just keep the calibration here 
-   #Keep all of the files with these years - because the naming convention runs 
+
+   #Sub out beyond cal period (2010 - 2020) - ie just keep the calibration here
+   #Keep all of the files with these years - because the naming convention runs
    #from Nov to following year we need to just take the first 30 days of the one starting with 20091201-
    n2 <- min(grep("20091201-",names(cal.df))) + 29
-   
-   #This is the first part of the validation dataset, but all the val will be added to the projection df for 
+
+   #This is the first part of the validation dataset, but all the val will be added to the projection df for
    #the sake of bias correction and assessed separately
    proj.df1 <- cal.df[c((n2+1):ncol(cal.df))]
    cal.df <- cal.df[c(1:n2)]
-   
+
    gc()
-   
+
    yi <- paste0(i,c(2020,2040,2060), collapse="|")
    cpm.proj <- cpm.files[grepl(yi, cpm.files)]
-   
+
    #Subset to Area, var and run
    cpm.proj <- cpm.proj[grepl(i, cpm.proj)&grepl(v, cpm.proj)&grepl(r, cpm.proj)]
-   
-   #Load in 
+
+   #Load in
    proj.df2 <- lapply(cpm.proj, function(x){
      df <- as.data.frame(fread(paste0(fp, x)))
      #Remove x and y cols
      df[c(3:ncol(df))]
    })
-   
+
    names(proj.df2) <- cpm.proj
-   
+
    proj.df <- c(list(proj.df1), proj.df2) %>% reduce(cbind)
-   
+
    remove("proj.df1")
    remove("proj.df2")
-   
+
    ## **2. Wrangle the data**
-   
+
    #missing.in.hads.cpm.cal <- cal.df[-which(row.names(cal.df)%in%row.names(obs.df)),]
    #missing.in.hads.cpm.proj <- proj.df[-which(row.names(proj.df)%in%row.names(obs.df)),]
-   
-   
+
+
    cal.df <- cal.df[which(row.names(cal.df)%in%row.names(obs.df)),]
    proj.df <- proj.df[which(row.names(proj.df)%in%row.names(obs.df)),]
-   
-   #save the missing outputs 
+
+   #save the missing outputs
    p <- paste0("checkpoint1", v, "_", i, "_", r, "_")
    print(p)
    #write.csv(missing.in.hads.cpm.cal, paste0(dd, "Debiased/R/QuantileMapping/missing.in.hads/",r,"_",i,"_",v, ".csv"))
-   
+
    ### Update obs data to 360 days
-   
-   #The below is a work around with the HADS dataset having 365 days on leap years - this is to be updateed and corrected when the 360 day sampling is better sorted 
-   
+
+   #The below is a work around with the HADS dataset having 365 days on leap years - this is to be updateed and corrected when the 360 day sampling is better sorted
+
    #Convert obs to 360 day year - has 40 more vars so remove the ones not in cal
    remove <- c("0229_29", "0430_30", "0731_31", "0930_30", "1130_30")
    remove <- paste0(remove, collapse = "|")
-   
+
    obs.df <- obs.df[,!grepl(remove, names(obs.df))]
    #This still pulls in the 31st Dec 2009 for some reason is in the hads so manual remove
    obs.df <- obs.df[1:ncol(cal.df)]
-   
+
    ### Transpose the data sets
-   
+
    #Obs grid should be cols, observations (time) should be rows for linear scaling
-   
+
    cal.df <- t(cal.df)
    proj.df <- t(proj.df)
    obs.df <- t(obs.df)
-    
+
     ## **3. Empirical Quantile Mapping**
-    
+
     #(from qmap vignette) - fitQmapQUANT estimates values of the empirical cumulative distribution function of observed and
     #modelled time series for regularly spaced quantiles. doQmapQUANT uses these estimates to perform
     #quantile mapping
     p <- paste0("checkpoint2", v, "_", i, "_", r, "_")
     print(p)
-  
-  
+
+
           qm1.fit <- fitQmapQUANT(obs.df, cal.df,
-                          wet.day = TRUE, #If wet.day=TRUE the empirical probability of nonzero observations is found (obs>=0) and the corresponding modelled value is selected as a threshold. All modelled values below this threshold are set to zero. If wet.day is numeric the same procedure is performed after setting all obs to zero. 
-                          qstep = 0.01, 
-                          nboot = 1) #nboot number of bootstrap samples used for estimation of the observed quantiles. 
-  
-  
+                          wet.day = TRUE, #If wet.day=TRUE the empirical probability of nonzero observations is found (obs>=0) and the corresponding modelled value is selected as a threshold. All modelled values below this threshold are set to zero. If wet.day is numeric the same procedure is performed after setting all obs to zero.
+                          qstep = 0.01,
+                          nboot = 1) #nboot number of bootstrap samples used for estimation of the observed quantiles.
+
+
           qm1.hist.a <- doQmapQUANT(cal.df, qm1.fit, type="linear")
           qm1.hist.b <- doQmapQUANT(cal.df, qm1.fit, type="tricub")
-          
+
           qm1.proj.a <- doQmapQUANT(proj.df, qm1.fit, type="linear")
           qm1.proj.b <- doQmapQUANT(proj.df, qm1.fit, type="tricub")
-          
+
           ## **4. Save the data**
           p <- paste0("checkpoint3", v, "_", i, "_", r, "_")
           print(p)
           # Save data - lists of dfs for now (will be easier for assessment)
           results.L <- list(obs.df, cal.df, proj.df, qm1.hist.a, qm1.hist.b, qm1.proj.a, qm1.proj.b)
-          
+
           names(results.L) <- c("t.obs", "t.cal", "t.proj", "qm1.hist.a", "qm1.hist.b", "qm1.proj.a", "qm1.proj.b")
           p <- paste0("checkpoint4", v, "_", i, "_", r, "_")
           print(p)
           base::saveRDS(results.L, file = paste0(dd, "Debiased/R/QuantileMapping/resultsL", r,"_",i,"_",v, ".RDS"))
-          
+
           p <- paste0("checkpoint5", v, "_", i, "_", r, "_")
           print(p)
           rm(list=setdiff(ls(), c("v", "i", "r", "var", "Runs")))
-          
+
           gc(reset=TRUE)
 
-  
+
  }
       }
     }
 }
 
 
-###################### Further cropping to the cropped dfs (!) - mostly for Scotland which is too big! 
+###################### Further cropping to the cropped dfs (!) - mostly for Scotland which is too big!
 
 cropdf_further_apply_bc_to_cropped_df <- function(region, #Region code - needs to relate to the file name in a unique way to subset
                                                 var, #Meterological variables
-                                                Runs, #Runs in form 'Run08'- matched input 
+                                                Runs, #Runs in form 'Run08'- matched input
                                                 N.new.segments,...){ #Numeric, Number of dfs to break down to, eg 4
-  
-  i <- region   
+
+  i <- region
   N.new.segments<- N.new.segments
   Runs <- Runs
   var <- var
-  
+
   for(r in Runs){
     for(v in var){
       for(y in 1:N.new.segments){
       if(v!="pr"){
         dd <- "/mnt/vmfileshare/ClimateData/"
-        
+
         #Subset to Area
-        #Load cpm first and then use this to subset the latter as there are more cells in cpm that hads 
+        #Load cpm first and then use this to subset the latter as there are more cells in cpm that hads
         #Using 1980 - 2010 as calibration period
         fp <- paste0(dd, "Interim/CPM/Data_as_df/")
         cpm.files <- list.files(fp)
-        
+
         #Calibration years 1980 - 2010  - load in full one for 1980 - 2000
         cpm.cal <- cpm.files[grepl("1980|2000", cpm.files)]
-        
-        #Subset file list to area 
+
+        #Subset file list to area
         cpm.cal <- cpm.cal[grepl(i, cpm.cal)]
-        
+
         #subset to var and run
         cpm.cal.var <- cpm.cal[grepl(v, cpm.cal)&grepl(r, cpm.cal)]
-        
-        #Load in 
+
+        #Load in
         cal.df <- lapply(cpm.cal.var, function(x){
           df <- fread(paste0(fp, x))
           df <- as.data.frame(df)
-          
+
           row.names(df)<- paste0(df$x, "_", df$y)
           df$x <- NULL
           df$y <- NULL
           return(df)
         })
-        
+
         cal.df <- cal.df %>% reduce(cbind)
-        
-        
-        #Sub out beyond cal period (2010 - 2020) - ie just keep the calibration here 
-        #Keep all of the files with these years - because the naming convention runs 
+
+
+        #Sub out beyond cal period (2010 - 2020) - ie just keep the calibration here
+        #Keep all of the files with these years - because the naming convention runs
         #from Nov to following year we need to just take the first 30 days of the one starting with 20091201-
         n2 <- min(grep("20091201-",names(cal.df))) + 29
-        
-        #This is the first part of the validation dataset, but all the val will be added to the projection df for 
+
+        #This is the first part of the validation dataset, but all the val will be added to the projection df for
         #the sake of bias correction and assessed separately
         proj.df1 <- cal.df[c((n2+1):ncol(cal.df))]
         cal.df <- cal.df[c(1:n2)]
-        
+
         #Subset the dataframe iteratively depending on y
         nrows.seg <- nrow(cal.df)/N.new.segments
         y_1 <- y-1
@@ -403,176 +403,176 @@ cropdf_further_apply_bc_to_cropped_df <- function(region, #Region code - needs t
         nr1 <- round(nrows.seg*y_1) + 1
         nr2 <- round(nrows.seg*y)
         cal.df <- cal.df[nr1:nr2,]
-        
+
         #proj data
         yi <- paste0(i,c(2020,2040,2060), collapse="|")
         cpm.proj <- cpm.files[grepl(yi, cpm.files)]
-        
+
         #Subset to Area, var and run
         cpm.proj <- cpm.proj[grepl(i, cpm.proj)&grepl(v, cpm.proj)&grepl(r, cpm.proj)]
-        
-        #Load in 
+
+        #Load in
         proj.df2 <- lapply(cpm.proj, function(x){
           df <- as.data.frame(fread(paste0(fp, x)))
           #Remove x and y cols
           df[c(3:ncol(df))]
         })
-        
+
        names(proj.df2) <- cpm.proj
-        
+
         proj.df <- c(list(proj.df1), proj.df2) %>% reduce(cbind)
         proj.df <- proj.df[which(row.names(proj.df)%in%row.names(cal.df)),]
-        
+
         remove("proj.df1")
         remove("proj.df2")
-        
-        
+
+
         #HADs grid observational data
         fp <- paste0(dd, "Interim/HadsUK/Data_as_df/")
         files <- list.files(fp)
         obs <- files[grepl(i, files)]
-        
+
         #subset file list to var
         obs.var <-  obs[grepl(v,obs)]
-        
-        #subset to calibration years  
+
+        #subset to calibration years
         obs.varc <- obs.var[grepl("1980", obs.var)]
         obs.df <- fread(paste0(fp, obs.varc))
         obs.df <- as.data.frame(obs.df)
-        
+
         row.names(obs.df) <- paste0(obs.df$x, "_", obs.df$y )
         obs.df$x <- NULL
         obs.df$y <- NULL
-        
+
         #Subset to the rows which are in above (some will be missing)
         obs.df <- obs.df[which(row.names(obs.df)%in%row.names(cal.df)),]
-        
-        #Remove the dates not in the cpm 
+
+        #Remove the dates not in the cpm
         ## find col position of the first cpm date 19801201
         n1 <-min(grep("19801201", names(obs.df)))
         obs.df <- obs.df[c(n1:ncol(obs.df))]
-        
+
         gc()
-        
-       
+
+
         ## **2. Wrangle the data**
-        
+
         #missing.in.hads.cpm.cal <- cal.df[-which(row.names(cal.df)%in%row.names(obs.df)),]
         #missing.in.hads.cpm.proj <- proj.df[-which(row.names(proj.df)%in%row.names(obs.df)),]
-        
-        
+
+
         cal.df <- cal.df[which(row.names(cal.df)%in%row.names(obs.df)),]
         proj.df <- proj.df[which(row.names(proj.df)%in%row.names(obs.df)),]
-        
-        #save the missing outputs 
+
+        #save the missing outputs
         p <- paste0("checkpoint1", v, "_", i, "_", r, "_",y)
         print(p)
         #write.csv(missing.in.hads.cpm.cal, paste0(dd, "Debiased/R/QuantileMapping/missing.in.hads/",r,"_",i,"_",v, ".csv"))
-        
+
         ### Update obs data to 360 days
-        
-        #The below is a work around with the HADS dataset having 365 days on leap years - this is to be updateed and corrected when the 360 day sampling is better sorted 
-        
+
+        #The below is a work around with the HADS dataset having 365 days on leap years - this is to be updateed and corrected when the 360 day sampling is better sorted
+
         #Convert obs to 360 day year - has 40 more vars so remove the ones not in cal
         remove <- c("0229_29", "0430_30", "0731_31", "0930_30", "1130_30")
         remove <- paste0(remove, collapse = "|")
-        
+
         obs.df <- obs.df[,!grepl(remove, names(obs.df))]
         #This still pulls in the 31st Dec 2009 for some reason is in the hads so manual remove
         obs.df <- obs.df[1:ncol(cal.df)]
-        
+
         ### Transpose the data sets
-        
+
         #Obs grid should be cols, observations (time) should be rows for linear scaling
-        
+
         cal.df <- t(cal.df)
         proj.df <- t(proj.df)
         obs.df <- t(obs.df)
-        
-        
+
+
         ## **3. Empirical Quantile Mapping**
-        
+
         #(from qmap vignette) - fitQmapQUANT estimates values of the empirical cumulative distribution function of observed and
         #modelled time series for regularly spaced quantiles. doQmapQUANT uses these estimates to perform
         #quantile mapping
         p <- paste0("checkpoint2", v, "_", i, "_", r, "_",y)
         print(p)
-        
+
         library(qmap)
         qm1.fit <- fitQmapQUANT(obs.df, cal.df,
                                 wet.day = FALSE,
-                                qstep = 0.01, 
-                                nboot = 1) #nboot number of bootstrap samples used for estimation of the observed quantiles. 
-        
-        
+                                qstep = 0.01,
+                                nboot = 1) #nboot number of bootstrap samples used for estimation of the observed quantiles.
+
+
         qm1.hist.a <- doQmapQUANT(cal.df, qm1.fit, type="linear")
         qm1.hist.b <- doQmapQUANT(cal.df, qm1.fit, type="tricub")
-        
+
         qm1.proj.a <- doQmapQUANT(proj.df, qm1.fit, type="linear")
         qm1.proj.b <- doQmapQUANT(proj.df, qm1.fit, type="tricub")
-        
+
         ## **4. Save the data**
         p <- paste0("checkpoint3", v, "_", i, "_", r, "_", y)
         print(p)
         # Save data - lists of dfs for now (will be easier for assessment)
         results.L <- list(obs.df, cal.df, proj.df, qm1.hist.a, qm1.hist.b, qm1.proj.a, qm1.proj.b)
-        
+
         names(results.L) <- c("t.obs", "t.cal", "t.proj", "qm1.hist.a", "qm1.hist.b", "qm1.proj.a", "qm1.proj.b")
         p <- paste0("checkpoint4", v, "_", i, "_", r, "_", y)
         print(p)
         base::saveRDS(results.L, file = paste0(dd, "Debiased/R/QuantileMapping/resultsL", r,"_",i,"_",y,"_",v, ".RDS"))
-        
+
         p <- paste0("checkpoint5", v, "_", i, "_", r, "_", y)
         print(p)
         rm(list=setdiff(ls(), c("v", "i", "r", "var", "Runs", "y", "N.new.segments")))
-        
+
         gc(reset=TRUE)
-        
-        
+
+
       } else {
-        
-        #### Precipitation - the HADs variable has is called 'rainfall' 
+
+        #### Precipitation - the HADs variable has is called 'rainfall'
         dd <- "/mnt/vmfileshare/ClimateData/"
-        
+
         #Subset to Area
-        #Load cpm first and then use this to subset the latter as there are more cells in cpm that hads 
+        #Load cpm first and then use this to subset the latter as there are more cells in cpm that hads
         #Using 1980 - 2010 as calibration period
         fp <- paste0(dd, "Interim/CPM/Data_as_df/")
         cpm.files <- list.files(fp)
-        
+
         #Calibration years 1980 - 2010  - load in full one for 1980 - 2000
         cpm.cal <- cpm.files[grepl("1980|2000", cpm.files)]
-        
-        #Subset file list to area 
+
+        #Subset file list to area
         cpm.cal <- cpm.cal[grepl(i, cpm.cal)]
-        
+
         #subset to var and run
         cpm.cal.var <- cpm.cal[grepl(v, cpm.cal)&grepl(r, cpm.cal)]
-        
-        #Load in 
+
+        #Load in
         cal.df <- lapply(cpm.cal.var, function(x){
           df <- fread(paste0(fp, x))
           df <- as.data.frame(df)
-          
+
           row.names(df)<- paste0(df$x, "_", df$y)
           df$x <- NULL
           df$y <- NULL
           return(df)
         })
-        
+
         cal.df <- cal.df %>% reduce(cbind)
-        
-        
-        #Sub out beyond cal period (2010 - 2020) - ie just keep the calibration here 
-        #Keep all of the files with these years - because the naming convention runs 
+
+
+        #Sub out beyond cal period (2010 - 2020) - ie just keep the calibration here
+        #Keep all of the files with these years - because the naming convention runs
         #from Nov to following year we need to just take the first 30 days of the one starting with 20091201-
         n2 <- min(grep("20091201-",names(cal.df))) + 29
-        
-        #This is the first part of the validation dataset, but all the val will be added to the projection df for 
+
+        #This is the first part of the validation dataset, but all the val will be added to the projection df for
         #the sake of bias correction and assessed separately
         proj.df1 <- cal.df[c((n2+1):ncol(cal.df))]
         cal.df <- cal.df[c(1:n2)]
-        
+
         #Subset the dataframe iteratively depending on y
         nrows.seg <- nrow(cal.df)/N.new.segments
         y_1 <- y-1
@@ -580,138 +580,136 @@ cropdf_further_apply_bc_to_cropped_df <- function(region, #Region code - needs t
         nr1 <- round(nrows.seg*y_1) + 1
         nr2 <- round(nrows.seg*y)
         cal.df <- cal.df[nr1:nr2,]
-        
-    
+
+
         #proj data
         yi <- paste0(i,c(2020,2040,2060), collapse="|")
         cpm.proj <- cpm.files[grepl(yi, cpm.files)]
-        
+
         #Subset to Area, var and run
         cpm.proj <- cpm.proj[grepl(i, cpm.proj)&grepl(v, cpm.proj)&grepl(r, cpm.proj)]
-        
-        #Load in 
+
+        #Load in
         proj.df2 <- lapply(cpm.proj, function(x){
           df <- as.data.frame(fread(paste0(fp, x)))
           #Remove x and y cols
           df[c(3:ncol(df))]
         })
-        
+
         names(proj.df2) <- cpm.proj
-        
+
         proj.df <- c(list(proj.df1), proj.df2) %>% reduce(cbind)
         proj.df <- proj.df[which(row.names(proj.df)%in%row.names(cal.df)),]
-        
+
         remove("proj.df1")
         remove("proj.df2")
-        
-        
+
+
         #HADs grid observational data
         fp <- paste0(dd, "Interim/HadsUK/Data_as_df/")
         files <- list.files(fp)
         obs <- files[grepl(i, files)]
-        
+
         #subset file list to var
         obs.var <-  obs[grepl("rainfall",obs)]
-        
-        #subset to calibration years  
+
+        #subset to calibration years
         obs.varc <- obs.var[grepl("1980", obs.var)]
         obs.df <- fread(paste0(fp, obs.varc))
         obs.df <- as.data.frame(obs.df)
-        
+
         row.names(obs.df) <- paste0(obs.df$x, "_", obs.df$y )
         obs.df$x <- NULL
         obs.df$y <- NULL
-        
+
         #Subset to the rows which are in above (some will be missing)
         obs.df <- obs.df[which(row.names(obs.df)%in%row.names(cal.df)),]
-        
-        #Remove the dates not in the cpm 
+
+        #Remove the dates not in the cpm
         ## find col position of the first cpm date 19801201
         n1 <-min(grep("19801201", names(obs.df)))
         obs.df <- obs.df[c(n1:ncol(obs.df))]
-        
+
         gc()
-        
-        
+
+
         ## **2. Wrangle the data**
-        
+
         #missing.in.hads.cpm.cal <- cal.df[-which(row.names(cal.df)%in%row.names(obs.df)),]
         #missing.in.hads.cpm.proj <- proj.df[-which(row.names(proj.df)%in%row.names(obs.df)),]
-        
-        
+
+
         cal.df <- cal.df[which(row.names(cal.df)%in%row.names(obs.df)),]
         proj.df <- proj.df[which(row.names(proj.df)%in%row.names(obs.df)),]
-        
-        #save the missing outputs 
+
+        #save the missing outputs
         p <- paste0("checkpoint1", v, "_", i, "_", r, "_",y)
         print(p)
         #write.csv(missing.in.hads.cpm.cal, paste0(dd, "Debiased/R/QuantileMapping/missing.in.hads/",r,"_",i,"_",v, ".csv"))
-        
+
         ### Update obs data to 360 days
-        
-        #The below is a work around with the HADS dataset having 365 days on leap years - this is to be updateed and corrected when the 360 day sampling is better sorted 
-        
+
+        #The below is a work around with the HADS dataset having 365 days on leap years - this is to be updateed and corrected when the 360 day sampling is better sorted
+
         #Convert obs to 360 day year - has 40 more vars so remove the ones not in cal
         remove <- c("0229_29", "0430_30", "0731_31", "0930_30", "1130_30")
         remove <- paste0(remove, collapse = "|")
-        
+
         obs.df <- obs.df[,!grepl(remove, names(obs.df))]
         #This still pulls in the 31st Dec 2009 for some reason is in the hads so manual remove
         obs.df <- obs.df[1:ncol(cal.df)]
-        
+
         ### Transpose the data sets
-        
+
         #Obs grid should be cols, observations (time) should be rows for linear scaling
-        
+
         cal.df <- t(cal.df)
         proj.df <- t(proj.df)
         obs.df <- t(obs.df)
-        
-        
+
+
         ## **3. Empirical Quantile Mapping**
-        
+
         #(from qmap vignette) - fitQmapQUANT estimates values of the empirical cumulative distribution function of observed and
         #modelled time series for regularly spaced quantiles. doQmapQUANT uses these estimates to perform
         #quantile mapping
         p <- paste0("checkpoint2", v, "_", i, "_", r, "_",y)
         print(p)
-        
+
         qm1.fit <- fitQmapQUANT(obs.df, cal.df,
-                                wet.day = TRUE, #If wet.day=TRUE the empirical probability of nonzero observations is found (obs>=0) and the corresponding modelled value is selected as a threshold. All modelled values below this threshold are set to zero. If wet.day is numeric the same procedure is performed after setting all obs to zero. 
-                                qstep = 0.01, 
-                                nboot = 1) #nboot number of bootstrap samples used for estimation of the observed quantiles. 
-        
-        
+                                wet.day = TRUE, #If wet.day=TRUE the empirical probability of nonzero observations is found (obs>=0) and the corresponding modelled value is selected as a threshold. All modelled values below this threshold are set to zero. If wet.day is numeric the same procedure is performed after setting all obs to zero.
+                                qstep = 0.01,
+                                nboot = 1) #nboot number of bootstrap samples used for estimation of the observed quantiles.
+
+
         qm1.hist.a <- doQmapQUANT(cal.df, qm1.fit, type="linear")
         qm1.hist.b <- doQmapQUANT(cal.df, qm1.fit, type="tricub")
-        
+
         qm1.proj.a <- doQmapQUANT(proj.df, qm1.fit, type="linear")
         qm1.proj.b <- doQmapQUANT(proj.df, qm1.fit, type="tricub")
-        
-        
+
+
         ## **4. Save the data**
         p <- paste0("checkpoint3", v, "_", i, "_", r, "_", y)
         print(p)
         # Save data - lists of dfs for now (will be easier for assessment)
         results.L <- list(obs.df, cal.df, proj.df, qm1.hist.a, qm1.hist.b, qm1.proj.a, qm1.proj.b)
-        
+
         names(results.L) <- c("t.obs", "t.cal", "t.proj", "qm1.hist.a", "qm1.hist.b", "qm1.proj.a", "qm1.proj.b")
         p <- paste0("checkpoint4", v, "_", i, "_", r, "_", y)
         print(p)
         base::saveRDS(results.L, file = paste0(dd, "Debiased/R/QuantileMapping/resultsL", r,"_",i,"_",y,"_",v, ".RDS"))
-        
+
         p <- paste0("checkpoint5", v, "_", i, "_", r, "_", y)
         print(p)
         rm(list=setdiff(ls(), c("v", "i", "r", "var", "Runs", "y", "N.new.segments")))
-        
+
         gc(reset=TRUE)
-        
-        
-        
+
+
+
       }
     }
   }
   }
   }
-
-
diff --git a/R/README.md b/R/README.md
index 390d95b9..6552b697 100644
--- a/R/README.md
+++ b/R/README.md
@@ -1,13 +1,13 @@
 # Methods implemented in R
 
-- **/Resampling** - code for Resampling data to different extents (grid sizes) 
+- **/Resampling** - code for Resampling data to different extents (grid sizes)
 - **/bias-correction-methods** - bias correction methods implemented in R
-- **/comparing-r-and-python** - Comparing various pipeline aspects between R and python 
+- **/comparing-r-and-python** - Comparing various pipeline aspects between R and python
 
 <!--
-### R 
+### R
 
-In the `R` subdirectory you can find code for replicating the different data processing and debiasing steps as above, along with comparisons of methods between the two languages. 
+In the `R` subdirectory you can find code for replicating the different data processing and debiasing steps as above, along with comparisons of methods between the two languages.
 - **bias-correction-methods** for bias correction (debiasing) methods available specifically in `R` libraries
 - **comparing-r-and-python** for replication of resampling and reviewing the bias correction methods applied in `python`.
 - **Resampling** for resampling the HADsUK datasets from 1km to 2.2km grid in `R`.
diff --git a/R/Resampling/Resampling.HADs.inR.R b/R/Resampling/Resampling.HADs.inR.R
index e1720912..2e4318a7 100644
--- a/R/Resampling/Resampling.HADs.inR.R
+++ b/R/Resampling/Resampling.HADs.inR.R
@@ -1,4 +1,4 @@
-#### Resampling the HADs 
+#### Resampling the HADs
 ## As the UKCP18 2.2km historic projections (needed for recal) just run from 1980-2000, regridding these daily observations from the 1km available via HADs
 ## Hads data from CEDA, Readme file for the data available in vmfileshare/ClimateData/HadUKData
 ## HADs data is on a 1 km grid OSGB projection (so should be the same as teh reprojected UKCD files )
@@ -11,22 +11,22 @@ setwd("/Volumes/vmfileshare/ClimateData/")
 #data directory
 dd <- getwd()
 
-#list of dds for each HadsUKgrid variable 
+#list of dds for each HadsUKgrid variable
 v <- c("tasmax","rainfall")
 vd <- paste0(dd,"/Raw/HadsUKgrid/",v,"/day/")
 
 
 HADs.files<- unlist(lapply(vd,list.files))
 
-HADs.nc.files <- HADs.files[grepl("*.nc$", HADs.files)]  
+HADs.nc.files <- HADs.files[grepl("*.nc$", HADs.files)]
 
-##For recalibration, only need years 1980-2000 
-##Updating to just be for one year because it keeps crashing out below 
+##For recalibration, only need years 1980-2000
+##Updating to just be for one year because it keeps crashing out below
 ##and this is being run to compare python methods
 s <- paste0("day_", 2000, collapse="|")
-HADs.nc.files.slice1 <- HADs.nc.files[grepl(s, HADs.nc.files)] 
+HADs.nc.files.slice1 <- HADs.nc.files[grepl(s, HADs.nc.files)]
 
-#Nested list of variables by terra rasts - One SpatRaster for each year 
+#Nested list of variables by terra rasts - One SpatRaster for each year
 
 List.Rast <- lapply(v, function(x){
   v.files <- HADs.nc.files.slice1[grepl(x, HADs.nc.files.slice1)]
@@ -38,10 +38,10 @@ List.Rast <- lapply(v, function(x){
 n.names <- paste0("HADbrickL_", v)
 names(List.Rast) <- n.names
 
-#### Create Grid from UKCP data 
+#### Create Grid from UKCP data
 
 #Extract the 2.2km grid from the UK raster and then overlay that and create the mean from HADs
-## Random ukcp file for this 
+## Random ukcp file for this
 f <- "/Volumes/vmfileshare/ClimateData/Processed/UKCP2.2_Reproj/tasmax_bng2/01/latest/tasmax_rcp85_land-cpm_uk_2.2km_01_day_19801201-19811130.tif"
 r <- rast(f)
 r <- r$`tasmax_rcp85_land-cpm_uk_2.2km_01_day_19801201-19811130_1`
@@ -50,7 +50,7 @@ r <- r$`tasmax_rcp85_land-cpm_uk_2.2km_01_day_19801201-19811130_1`
 ## Not run - lapply to run over all
 #Resampled.HADs<- lapply(List.Rast, function(L){
  # lapply(L, function(x){
-    #Bilinear resampling appropriate for continious vars 
+    #Bilinear resampling appropriate for continious vars
   #terra::resample(x, r, method="bilinear", threads=TRUE)
   #  })
   #})
@@ -62,4 +62,4 @@ y <- terra::resample(x, r, method="bilinear", threads=TRUE)
 
 ### Save output -- just for testing reasons going to save a geotiff and a netcd
 writeRaster(y, "Resampled_HADs_tasmax.2000.01.tif")
-saveRDS(y, "Resampled_HADs_tasmax.2000.01.RDS") 
+saveRDS(y, "Resampled_HADs_tasmax.2000.01.RDS")
diff --git a/R/bias-correction-methods/ThreeCitiesQmap.RMD b/R/bias-correction-methods/ThreeCitiesQmap.RMD
index 478ffe10..14c69fe0 100644
--- a/R/bias-correction-methods/ThreeCitiesQmap.RMD
+++ b/R/bias-correction-methods/ThreeCitiesQmap.RMD
@@ -13,14 +13,14 @@ knitr::opts_chunk$set(echo = TRUE)
 
 ## 0. About
 
-Testing `Qmap` for the 3 cities 
+Testing `Qmap` for the 3 cities
 
 ```{r libraries dd}
 rm(list=ls())
 
 library(qmap)
 library(terra)
-library(tidyverse) 
+library(tidyverse)
 library(doParallel)
 
 dd <- "/mnt/vmfileshare/ClimateData/"
@@ -29,17 +29,17 @@ dd <- "/mnt/vmfileshare/ClimateData/"
 
 ## Apply bias correction by variable/run in `qmap`
 
-`qmap` offers a few different bias correction options. 
+`qmap` offers a few different bias correction options.
 The below chunks calls a function that wraps the qmap function to loop over the cities and apply the bias correction
 It returns a list object, where for each model, we have:
 - a matrix of the calibration observation period (01-12-1980 to 30-11-2010 )
 - a matrix of values relating to the validation obs period (hads data 01-12-2010 to 01-12-2020)
-the raw values and the adjusted values for the CPM for the 
+the raw values and the adjusted values for the CPM for the
 "t.obs", "val.df", "t.cal", "t.proj", "qm1.hist", "qm1.val.proj"
 
-Data has been pre-processed from cropped using 'converting_city_crops_to_df.R' to .csv 
+Data has been pre-processed from cropped using 'converting_city_crops_to_df.R' to .csv
 
-##1. Empirical Quantile Mapping 
+##1. Empirical Quantile Mapping
 
 
 
@@ -53,7 +53,7 @@ var <- c("tasmax", "tasmin", "pr")
 
 lapply(cities, function(x){
   rd <- paste0(dd, "Debiased/R/QuantileMapping/three.cities/", x, "/")
-  
+
   apply_qmapQUANT_to_cropped_df(region = x,
                                 var = var,
                                 Runs = run,
@@ -67,11 +67,11 @@ lapply(cities, function(x){
                                      type = "linear", #interpolation method to use for fitting data to the predictions )(eg linear, tricubic)
                               rd = rd)
 })
-              
-                  
+
+
 ```
 
 
 ```{r}
 
-```
\ No newline at end of file
+```
diff --git a/R/bias-correction-methods/WIP_EQM.RMD b/R/bias-correction-methods/WIP_EQM.RMD
index 8fdb2241..4db53e84 100644
--- a/R/bias-correction-methods/WIP_EQM.RMD
+++ b/R/bias-correction-methods/WIP_EQM.RMD
@@ -16,7 +16,7 @@ knitr::opts_chunk$set(echo = TRUE)
 
 Testing Bias Correction methods from the Qmap package in R
 
-Loading data as created in 'Data_Processing_todf.R' 
+Loading data as created in 'Data_Processing_todf.R'
 
 ```{r libraries dd}
 rm(list=ls())
@@ -42,7 +42,7 @@ dd <- "/mnt/vmfileshare/ClimateData/"
 ## **1. Load data**
 
 As this method is univariate - applying seperately to each variable - starting with tasmax
-Starting with smallest region - London - for testing 
+Starting with smallest region - London - for testing
 
 ```{r hads obs data}
 
@@ -71,9 +71,9 @@ obs.df$y <- NULL
 fp <- paste0(dd, "Interim/CPM/Data_as_df/")
 files <- list.files(fp)
 
-#Calibration years 1980 - 2010 
+#Calibration years 1980 - 2010
 cpm.cal <- files[grepl("1980|2000", files)]
-  
+
 #Subset to London (UKI)
 Ldn.cpm.cal <- cpm.cal[grepl("UKI", cpm.cal)]
 
@@ -114,14 +114,14 @@ names(cal.dfs) <- c("Run05", "Run06", "Run07", "Run08")
 
 
 ```{r}
-#Seeing if can load All the proj data - 2010 - 2020 is test, so treating with proj - 2020 - 2080 is the rest of the data 
+#Seeing if can load All the proj data - 2010 - 2020 is test, so treating with proj - 2020 - 2080 is the rest of the data
 
 proj.df1 <- lapply(cal.dfs1[5:8], function(x){
   x2 <- x[,!grepl(lyrs, names(x))]
 })
 
 cpm.proj <- files[grepl("UKI2020|UKI2040|UKI2060", files)]
-  
+
 #Subset to London (UKI)
 Ldn.cpm.proj <- cpm.proj[grepl("UKI", cpm.proj)]
 
@@ -139,18 +139,18 @@ names(proj.df2) <- Ldn.cpm.proj.tasmax
 
 ```{r}
 
-#reduce however you did above but adding it in first series as above 
+#reduce however you did above but adding it in first series as above
 
 proj.dfs <- lapply(c("Run05", "Run06", "Run07", "Run08"), function(x){
-  
+
   i1 <- paste0("CPM_UKI2000_2020tasmax_", x, ".2023.07.16.csv")
   i2 <- paste0("CPM_UKI2020_2040tasmax_", x, ".2023.07.17.csv")
   i3 <- paste0("CPM_UKI2040_2060tasmax_", x, ".2023.07.17.csv")
   #This does assume x and y are in same order but because converted from raster should be sanity checked
-  #Remove x and y from proj df 
+  #Remove x and y from proj df
   df2 <- proj.df2[[i2]][c(3:ncol(proj.df2[[i2]]))]
   df3 <- proj.df2[[i3]][c(3:ncol(proj.df2[[i3]]))]
-  df.f <- list(proj.df1[[i1]], df2, df3) %>% 
+  df.f <- list(proj.df1[[i1]], df2, df3) %>%
     reduce(cbind)
   row.names(df.f) <- paste0(df.f$x, "_", df.f$y)
   df.f$x <- NULL
@@ -164,7 +164,7 @@ names(proj.dfs) <- c("Run05", "Run06", "Run07", "Run08")
 
 ## **2. Wrangle the data**
 
-### Ensure dfs have same grid references 
+### Ensure dfs have same grid references
 
 ```{r}
 #Note there are a few squares missing in the HADs grid, I'm not sure why (could be predom water which I think looses it?)
@@ -179,7 +179,7 @@ proj.Run05 <- proj.Run05[which(row.names(proj.Run05)%in%row.names(obs.df)),]
 
 ### Update obs data to 360 days
 
-The below is a work around with the HADS dataset having 365 days on leap years - this is to be updateed and corrected when the 360 day sampling is better sorted 
+The below is a work around with the HADS dataset having 365 days on leap years - this is to be updateed and corrected when the 360 day sampling is better sorted
 
 ```{r}
 #Convert obs to 360 day year - has 40 more vars so remove the ones not in cal
@@ -192,8 +192,8 @@ had365dates <- gsub("tasmax_1980|1980", "",names(had365day))
 had360day <- obs.df[,grepl("_1981", names(obs.df))]
 had360dates <- gsub("tasmax_1981|1981", "",names(had360day))
 
-rem <- had365dates[-which(had365dates%in%had360dates)] #Pulls out all the Feb dates - 
-#Have added as issue to repo - for now going to remove: "0201-0229_29" "0401-0430_30" "0701-0731_31" "0901-0930_30" "1101-1130_30" - but needs to be checked as should just be _31s removed? 
+rem <- had365dates[-which(had365dates%in%had360dates)] #Pulls out all the Feb dates -
+#Have added as issue to repo - for now going to remove: "0201-0229_29" "0401-0430_30" "0701-0731_31" "0901-0930_30" "1101-1130_30" - but needs to be checked as should just be _31s removed?
 
 remove <- c("0229_29", "0430_30", "0731_31", "0930_30", "1130_30")
 remove <- paste0(remove, collapse = "|")
@@ -226,8 +226,8 @@ library('qmap')
 
 qm1.fit <- fitQmapQUANT(t.obs.df2, t.cal.Run05,
                         wet.day = FALSE,
-                        qstep = 0.01, 
-                        nboot = 1) #nboot number of bootstrap samples used for estimation of the observed quantiles. 
+                        qstep = 0.01,
+                        nboot = 1) #nboot number of bootstrap samples used for estimation of the observed quantiles.
 
 
 qm1.hist.a <- doQmapQUANT(t.cal.Run05, qm1.fit, type="linear")
@@ -269,6 +269,3 @@ t2 <- cbind(xydf, test)
  terra::writeRaster(dfr2, fn, overwrite=T)
 
 ```
-
-
-
diff --git a/R/bias-correction-methods/apply_qmapQuant_to_crpd_df_fn.R b/R/bias-correction-methods/apply_qmapQuant_to_crpd_df_fn.R
index e228c2a2..15e3c1e7 100644
--- a/R/bias-correction-methods/apply_qmapQuant_to_crpd_df_fn.R
+++ b/R/bias-correction-methods/apply_qmapQuant_to_crpd_df_fn.R
@@ -1,6 +1,6 @@
-#Re-writing WIP_EQM as a loop, to loop over each segment and hopefully create bias corrected for all UK 
+#Re-writing WIP_EQM as a loop, to loop over each segment and hopefully create bias corrected for all UK
 
-##Loading data as created in 'Data_Processing_todf.R' 
+##Loading data as created in 'Data_Processing_todf.R'
 
 #Requires
 library(tidyverse)
@@ -10,36 +10,36 @@ library(qmap)
 
 apply_qmapQUANT_to_cropped_df <- function(region, #Region code - needs to relate to the file name in a unique way to subset
                                                 var, #Meterological variables - as in files
-                                                Runs, #Run as in name of files 
+                                                Runs, #Run as in name of files
                                           pd, #Parent directory where dataframes of data are
                                           pd.obs, #Parent directory where dataframes of obs data are
                                           val.startdate, #The first date of the validation period. eg 20101201 All dates before this time will be taken as the calibration
-                                    rd, #where to store the results list output buy this 
+                                    rd, #where to store the results list output buy this
                                           ## These args to be passed to qmapQUANT itself:
                                      wet.day,
                                      qstep, # numeric value between 0 and 1, e.g 0.1. The quantile mapping is fitted only for the quantiles defined by quantile(0,1,probs=seq(0,1,by=qstep).
                                      nboot, #numeric value 1 or greater - nboot number of bootstrap samples used for estimation of the observed quantiles.If nboot==1 the estimation is based on all (and not resampled) data.
 
                                      type) #interpolation method to use for fitting data to the predictions )(eg linear, tricubic)
-                                    
+
                                     {
 
-  i <- region   
+  i <- region
   pd <- pd
   pd.obs <- pd.obs
   rd <- rd
   qstep <- qstep
   nboot <- nboot
-  type <- type  
+  type <- type
 
 for(r in Runs){
   for(v in var){
        wet.day <- ifelse(v=="pr", T, F)
-       obs.files <- list.files(paste0(pd.obs, i), full.names = T) 
+       obs.files <- list.files(paste0(pd.obs, i), full.names = T)
        obs.files.v <- obs.files[grepl(v, obs.files)]
        obs.df <- fread(obs.files.v)
        obs.df <- as.data.frame(obs.df)
-        
+
        #subset file list to var
         row.names(obs.df) <- paste0(obs.df$x, "_", obs.df$y)
         obs.df$x <- NULL
@@ -49,26 +49,26 @@ for(r in Runs){
         n1 <-min(grep(val.startdate, names(obs.df)))
         val.df <- obs.df[c(n1:ncol(obs.df))]
         obs.df <- obs.df[c(1:n1-1)]
-        
-        #Load in CPM data - df contains all 100 years 
-        cpm.files <- list.files(paste0(pd, i), full.names = T) 
+
+        #Load in CPM data - df contains all 100 years
+        cpm.files <- list.files(paste0(pd, i), full.names = T)
         cpm.files.v <- cpm.files[grepl(v, cpm.files)&grepl(r, cpm.files)]
-        
+
         cpm.df <- fread(cpm.files.v)
         cpm.df <- as.data.frame(cpm.df)
         row.names(cpm.df) <- paste0(cpm.df$x, "_", cpm.df$y )
         cpm.df$x <- NULL
         cpm.df$y <- NULL
-        
+
         #Calibration years 1980 - 2010  - load in full one for 1980 - 2000
         cpm.cal <- cpm.df[c(1:n1-1)]
         cpm.proj <- cpm.df[c(n1:ncol(cpm.df))]
         remove(cpm.df)
-        
+
         #Some cells missing in hads so subset here jic
         cal.df <- cpm.cal[which(row.names(cpm.cal)%in%row.names(obs.df)),]
         proj.df<- cpm.proj[which(row.names(cpm.proj)%in%row.names(obs.df)),]
-        
+
         remove(cpm.cal)
         remove(cpm.proj)
 
@@ -77,15 +77,15 @@ for(r in Runs){
       cal.df <- t(cal.df)
       proj.df <- t(proj.df)
       obs.df <- t(obs.df)
-      
-  
+
+
 ## **3. Empirical Quantile Mapping**
 
       library(qmap)
       qm1.fit <- fitQmapQUANT(obs.df, cal.df,
                         wet.day = wet.day,
-                        qstep = qstep, 
-                        nboot = nboot) #nboot number of bootstrap samples used for estimation of the observed quantiles. 
+                        qstep = qstep,
+                        nboot = nboot) #nboot number of bootstrap samples used for estimation of the observed quantiles.
 
 
         qm1.hist <- doQmapQUANT(cal.df, qm1.fit, type=type)
@@ -105,6 +105,3 @@ for(r in Runs){
     }
   }
 }
-  
-
-
diff --git a/R/comparing-r-and-python/HADs-reprojection/WIP Comparing HADs grids.Rmd b/R/comparing-r-and-python/HADs-reprojection/WIP Comparing HADs grids.Rmd
index f92c5dda..e86ecb56 100644
--- a/R/comparing-r-and-python/HADs-reprojection/WIP Comparing HADs grids.Rmd	
+++ b/R/comparing-r-and-python/HADs-reprojection/WIP Comparing HADs grids.Rmd	
@@ -1,6 +1,6 @@
 ---
 title: "Comparing-Reprojections-HADs"
-output: 
+output:
   github_document:
     keep_html: true
   html_document:
@@ -28,25 +28,25 @@ dd <- "/Volumes/vmfileshare/ClimateData/"
 
 ## **0. About**
 
-Bias correction techniques in general require observational data to compare with climate projections in order to appropriately correct the bias. 
+Bias correction techniques in general require observational data to compare with climate projections in order to appropriately correct the bias.
 
-The [HadUK grid](https://catalogue.ceda.ac.uk/uuid/bbca3267dc7d4219af484976734c9527) is a 1km x 1km gridded dataset derived from meterological station observations. 
+The [HadUK grid](https://catalogue.ceda.ac.uk/uuid/bbca3267dc7d4219af484976734c9527) is a 1km x 1km gridded dataset derived from meterological station observations.
 
-The first UKCP product for review is the UCKP convection-permitting dataset, on a 2.2km grid. Therefore, we are resmapling the 1km grid using bilenear interpolation to 2.2km grid extent. 
+The first UKCP product for review is the UCKP convection-permitting dataset, on a 2.2km grid. Therefore, we are resmapling the 1km grid using bilenear interpolation to 2.2km grid extent.
 
 We have ran this seperately in both ```r``` and ```python```. The aim of this doc is to:
 
  - Ensure both methods produce the same result
- - Ensure the grid has been resampled to the correct extent and CRS 
+ - Ensure the grid has been resampled to the correct extent and CRS
 
-## **1. Data** 
+## **1. Data**
 
 ### **1a. HadUK grid resampled in R**
     Resampling script [here](https://github.com/alan-turing-institute/clim-recal/blob/main/R/Resampling.HADs.inR.R)
     The 2.2km grid was derived from a reprojected (to BNG) UKCP 2.2km .nc file
 
 
-In resampling water vals are resampled as 1.000000e+20  so replacing those vals as NA 
+In resampling water vals are resampled as 1.000000e+20  so replacing those vals as NA
 
 ```{r load data 1}
 
@@ -66,7 +66,7 @@ plot(r1$tasmax_1)
 
 ### **1b. HadUK grid resampled in python**
   Resampling script [here](https://github.com/alan-turing-institute/clim-recal/blob/main/python/resampling/resampling_hads.py)
-  
+
 **THIS UPDATED 17/02/23**
 
 ```{r load data 2}
@@ -75,8 +75,8 @@ py.pros.tasmax <- list.files(paste0(dd,"Processed/HadsUKgrid/resampled_2.2km_new
 r2 <- py.pros.tasmax[grepl("200001", py.pros.tasmax)] #Same file as resampled above
 r2 <- paste0(paste0(dd, "Processed/HadsUKgrid/resampled_2.2km_newgrid/tasmax/day"),"/",r2)
 r2 <- rast(r2)
-crs(r2, proj=T) #check crs 
-## Ok so interesting is missing a crs slot on read - I wonder why this is? This could cause future problem potentially? 
+crs(r2, proj=T) #check crs
+## Ok so interesting is missing a crs slot on read - I wonder why this is? This could cause future problem potentially?
 
 plot(r2$tasmax_1)
 
@@ -121,13 +121,13 @@ scotland <- vect("~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/CLIM-RE
 
 ## **2. Comparisons **
 
-Crop extents to be the same 
+Crop extents to be the same
 
 ```{r}
 #Noticed the crop takes longer on r2_c - for investigation!
 
 b <- Sys.time()
-r1_c <- terra::crop(r1, scotland, snap="in") 
+r1_c <- terra::crop(r1, scotland, snap="in")
 e <- Sys.time()
 e-b
 
@@ -137,7 +137,7 @@ plot(r1_c$tasmax_1)
 ```{r}
 
 b <- Sys.time()
-r2_c <- terra::crop(r2, scotland, snap="in") 
+r2_c <- terra::crop(r2, scotland, snap="in")
 e <- Sys.time()
 e-b
 
@@ -145,13 +145,13 @@ plot(r2_c$tasmax_1)
 ```
 
 ```{r}
-og_c <- terra::crop(og, scotland, snap="in") 
+og_c <- terra::crop(og, scotland, snap="in")
 plot(og_c$tasmax_1)
 ```
-Ok there are some differences that I can see from the plot between the two resampled files! 
+Ok there are some differences that I can see from the plot between the two resampled files!
 
 ```{r}
-## Cropping to a small area to compare with the same orginal HADS file 
+## Cropping to a small area to compare with the same orginal HADS file
 i <- rast()
 ext(i) <- c(200000, 210000, 700000, 710000)
 ```
@@ -189,7 +189,7 @@ plot(og_ci$tasmax_1)
 ```
 
 ```{r}
-ukcp_c <- terra::crop(ukcp.r, i) 
+ukcp_c <- terra::crop(ukcp.r, i)
 plot(ukcp_c$`tasmax_rcp85_land-cpm_uk_2.2km_01_day_19991201-20001130_31`)
 
 ext(ukcp_c)
@@ -209,4 +209,3 @@ ukcp_c_xy
 all(ukcp_c_xy, r.reproj_c_xy)
 
 ```
-
diff --git a/R/comparing-r-and-python/HADs-reprojection/WIP-Comparing-HADs-grids.Rmd b/R/comparing-r-and-python/HADs-reprojection/WIP-Comparing-HADs-grids.Rmd
index 9d7112d1..520e7f2f 100755
--- a/R/comparing-r-and-python/HADs-reprojection/WIP-Comparing-HADs-grids.Rmd
+++ b/R/comparing-r-and-python/HADs-reprojection/WIP-Comparing-HADs-grids.Rmd
@@ -1,6 +1,6 @@
 ---
 title: "Comparing-Reprojections-HADs"
-output: 
+output:
   github_document:
     keep_html: true
   html_document:
@@ -28,25 +28,25 @@ dd <- "/Volumes/vmfileshare/ClimateData/"
 
 ## **0. About**
 
-Bias correction techniques in general require observational data to compare with climate projections in order to appropriately correct the bias. 
+Bias correction techniques in general require observational data to compare with climate projections in order to appropriately correct the bias.
 
-The [HadUK grid](https://catalogue.ceda.ac.uk/uuid/bbca3267dc7d4219af484976734c9527) is a 1km x 1km gridded dataset derived from meterological station observations. 
+The [HadUK grid](https://catalogue.ceda.ac.uk/uuid/bbca3267dc7d4219af484976734c9527) is a 1km x 1km gridded dataset derived from meterological station observations.
 
-The first UKCP product for review is the UCKP convection-permitting dataset, on a 2.2km grid. Therefore, we are resmapling the 1km grid using bilenear interpolation to 2.2km grid extent. 
+The first UKCP product for review is the UCKP convection-permitting dataset, on a 2.2km grid. Therefore, we are resmapling the 1km grid using bilenear interpolation to 2.2km grid extent.
 
 We have ran this seperately in both ```r``` and ```python```. The aim of this doc is to:
 
  - Ensure both methods produce the same result
- - Ensure the grid has been resampled to the correct extent and CRS 
+ - Ensure the grid has been resampled to the correct extent and CRS
 
-## **1. Data** 
+## **1. Data**
 
 ### **1a. HadUK grid resampled in R**
     Resampling script [here](https://github.com/alan-turing-institute/clim-recal/blob/main/R/Resampling.HADs.inR.R)
     The 2.2km grid was derived from a reprojected (to BNG) UKCP 2.2km .nc file
 
 
-In resampling it resampled the Sea as xx so replacing those vals as NA 
+In resampling it resampled the Sea as xx so replacing those vals as NA
 
 ```{r load data 1}
 
@@ -66,7 +66,7 @@ plot(r1$tasmax_1)
 
 ### **1b. HadUK grid resampled in python**
   Resampling script [here](https://github.com/alan-turing-institute/clim-recal/blob/main/python/resampling/resampling_hads.py)
-  
+
 **THIS UPDATED 17/02/23**
 
 ```{r load data 2}
@@ -75,8 +75,8 @@ py.pros.tasmax <- list.files(paste0(dd,"Processed/HadsUKgrid/resampled_2.2km_new
 r2 <- py.pros.tasmax[grepl("200001", py.pros.tasmax)] #Same file as resampled above
 r2 <- paste0(paste0(dd, "Processed/HadsUKgrid/resampled_2.2km_newgrid/tasmax/day"),"/",r2)
 r2 <- rast(r2)
-crs(r2, proj=T) #check crs 
-## Ok so interesting is missing a crs slot on read - I wonder why this is? This could cause future problem potentially? 
+crs(r2, proj=T) #check crs
+## Ok so interesting is missing a crs slot on read - I wonder why this is? This could cause future problem potentially?
 
 plot(r2$tasmax_1)
 
@@ -121,13 +121,13 @@ scotland <- vect("~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/CLIM-RE
 
 ## **2. Comparisons **
 
-Crop extents to be the same 
+Crop extents to be the same
 
 ```{r}
 #Noticed the crop takes longer on r2_c - for investigation!
 
 b <- Sys.time()
-r1_c <- terra::crop(r1, scotland, snap="in") 
+r1_c <- terra::crop(r1, scotland, snap="in")
 e <- Sys.time()
 e-b
 
@@ -137,7 +137,7 @@ plot(r1_c$tasmax_1)
 ```{r}
 
 b <- Sys.time()
-r2_c <- terra::crop(r2, scotland, snap="in") 
+r2_c <- terra::crop(r2, scotland, snap="in")
 e <- Sys.time()
 e-b
 
@@ -145,13 +145,13 @@ plot(r2_c$tasmax_1)
 ```
 
 ```{r}
-og_c <- terra::crop(og, scotland, snap="in") 
+og_c <- terra::crop(og, scotland, snap="in")
 plot(og_c$tasmax_1)
 ```
-Ok there are some differences that I can see from the plot between the two resampled files! 
+Ok there are some differences that I can see from the plot between the two resampled files!
 
 ```{r}
-## Cropping to a small area to compare with the same orginal HADS file 
+## Cropping to a small area to compare with the same orginal HADS file
 i <- rast()
 ext(i) <- c(200000, 210000, 700000, 710000)
 ```
@@ -189,7 +189,7 @@ plot(og_ci$tasmax_1)
 ```
 
 ```{r}
-ukcp_c <- terra::crop(ukcp.r, i) 
+ukcp_c <- terra::crop(ukcp.r, i)
 plot(ukcp_c$`tasmax_rcp85_land-cpm_uk_2.2km_01_day_19991201-20001130_31`)
 
 ext(ukcp_c)
@@ -209,4 +209,3 @@ ukcp_c_xy
 all(ukcp_c_xy, r.reproj_c_xy)
 
 ```
-
diff --git a/R/comparing-r-and-python/HADs-reprojection/WIP-Comparing-HADs-grids.md b/R/comparing-r-and-python/HADs-reprojection/WIP-Comparing-HADs-grids.md
index 81b813bb..1bfedacf 100644
--- a/R/comparing-r-and-python/HADs-reprojection/WIP-Comparing-HADs-grids.md
+++ b/R/comparing-r-and-python/HADs-reprojection/WIP-Comparing-HADs-grids.md
@@ -75,13 +75,13 @@ py.pros.tasmax <- list.files(paste0(dd,"Processed/HadsUKgrid/resampled_2.2km_new
 r2 <- py.pros.tasmax[grepl("200001", py.pros.tasmax)] #Same file as resampled above
 r2 <- paste0(paste0(dd, "Processed/HadsUKgrid/resampled_2.2km_newgrid/tasmax/day"),"/",r2)
 r2 <- rast(r2)
-crs(r2, proj=T) #check crs 
+crs(r2, proj=T) #check crs
 ```
 
     ## [1] "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +a=6377563.396 +rf=299.324961266495 +units=m +no_defs"
 
 ``` r
-## Ok so interesting is missing a crs slot on read - I wonder why this is? This could cause future problem potentially? 
+## Ok so interesting is missing a crs slot on read - I wonder why this is? This could cause future problem potentially?
 
 plot(r2$tasmax_1)
 ```
@@ -143,7 +143,7 @@ Crop extents to be the same
 #Noticed the crop takes longer on r2_c - for investigation!
 
 b <- Sys.time()
-r1_c <- terra::crop(r1, scotland, snap="in") 
+r1_c <- terra::crop(r1, scotland, snap="in")
 e <- Sys.time()
 e-b
 ```
@@ -158,7 +158,7 @@ plot(r1_c$tasmax_1)
 
 ``` r
 b <- Sys.time()
-r2_c <- terra::crop(r2, scotland, snap="in") 
+r2_c <- terra::crop(r2, scotland, snap="in")
 e <- Sys.time()
 e-b
 ```
@@ -172,7 +172,7 @@ plot(r2_c$tasmax_1)
 ![](WIP-Comparing-HADs-grids_files/figure-gfm/unnamed-chunk-5-1.png)<!-- -->
 
 ``` r
-og_c <- terra::crop(og, scotland, snap="in") 
+og_c <- terra::crop(og, scotland, snap="in")
 plot(og_c$tasmax_1)
 ```
 
@@ -181,7 +181,7 @@ Ok there are some differences that I can see from the plot between the
 two resampled files!
 
 ``` r
-## Cropping to a small area to compare with the same orginal HADS file 
+## Cropping to a small area to compare with the same orginal HADS file
 i <- rast()
 ext(i) <- c(200000, 210000, 700000, 710000)
 ```
@@ -246,7 +246,7 @@ plot(og_ci$tasmax_1)
 ![](WIP-Comparing-HADs-grids_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
 
 ``` r
-ukcp_c <- terra::crop(ukcp.r, i) 
+ukcp_c <- terra::crop(ukcp.r, i)
 plot(ukcp_c$`tasmax_rcp85_land-cpm_uk_2.2km_01_day_19991201-20001130_31`)
 ```
 
diff --git a/R/pre-processing/converting_city_crops_to_df.R b/R/pre-processing/converting_city_crops_to_df.R
index df9cee3c..7fa85d30 100644
--- a/R/pre-processing/converting_city_crops_to_df.R
+++ b/R/pre-processing/converting_city_crops_to_df.R
@@ -2,12 +2,12 @@
 
 rm(list=ls())
 
-# Most input to bias correction methods in R need dfs 
+# Most input to bias correction methods in R need dfs
 
 # Might be better in future to seperate it out differently (ie not run hads and cpm in same loop, or by variable )
 library(qmap)
 library(terra)
-library(tidyverse) 
+library(tidyverse)
 library(doParallel)
 
 dd <- "/mnt/vmfileshare/ClimateData/"
@@ -19,55 +19,55 @@ x <- c("London", "Manchester", "Glasgow")
 # Where to write the results (note subfolder added as name of city/x above)
 rd <- paste0(dd, "Interim/CPM/Data_as_df/three.cities/")
 
-#file locations for cropped versions of CPM 
+#file locations for cropped versions of CPM
 fps <- paste0(dd, "Cropped/three.cities/CPM/")
 fps <- paste0(fps,x)
 
-#Run the conversion to df in parallel 
+#Run the conversion to df in parallel
 cores <- detectCores()
 cl <- makeCluster(cores[1]-1)
 registerDoParallel(cl)
 
-foreach(x = x, 
+foreach(x = x,
         .packages = c("terra", "tidyverse"),
-        .errorhandling = 'pass') %dopar% { 
-          
+        .errorhandling = 'pass') %dopar% {
+
           fp <- fps[grepl(x, fps)]
           fp <- paste0(fp, "/")
           files <- list.files(fp)
           files.paths.all <- paste0(fp, files)
-          
+
           #group in runs and in variable
-          
+
     for(v in var){
       for(r in run){
           rr <- paste0("_",r,"_")
         files.paths <- files.paths.all[grepl(v, files.paths.all)& grepl(rr, files.paths.all)&grepl("CPM", files.paths.all)]
-        
-          # Read in 1st runpath as df with xy coords to ensure overlay 
-          p1 <- files.paths[[1]] 
+
+          # Read in 1st runpath as df with xy coords to ensure overlay
+          p1 <- files.paths[[1]]
           rast <- rast(p1)
-          rdf1 <- as.data.frame(rast, xy=T) 
-          
-          # Load and convert remaining to single col dfs 
+          rdf1 <- as.data.frame(rast, xy=T)
+
+          # Load and convert remaining to single col dfs
           dfL <- lapply(2:length(files.paths), function(i){
-            p <- files.paths[[i]] 
+            p <- files.paths[[i]]
             rast <- rast(p)
-            rdf <- as.data.frame(rast) 
+            rdf <- as.data.frame(rast)
             return(rdf)
-          }) 
-          
+          })
+
           df <- dfL %>% reduce(cbind)
           df <- cbind(rdf1, df)
-          
+
           fn <- paste0(rd, x, "/", v, "_","Run",r, ".csv")
           write.csv(df, fn, row.names = F)
-          
-          
-      } 
+
+
+      }
     }
         }
-stopCluster(cl) 
+stopCluster(cl)
 gc()
 
 
@@ -78,74 +78,74 @@ rd <- paste0(dd, "Interim/HadsUK/Data_as_df/three.cities/")
 fps <- paste0(dd, "Cropped/three.cities/Hads.updated360/")
 fps <- paste0(fps,x)
 
-#Run the conversion to df in parallel 
+#Run the conversion to df in parallel
 cores <- detectCores()
 cl <- makeCluster(cores[1]-1)
 registerDoParallel(cl)
 
-foreach(x = x, 
+foreach(x = x,
         .packages = c("terra", "tidyverse"),
-        .errorhandling = 'pass') %dopar% { 
-          
+        .errorhandling = 'pass') %dopar% {
+
           fp <- fps[grepl(x, fps)]
           fp <- paste0(fp, "/")
           files <- list.files(fp)
           files.paths.all <- paste0(fp, files)
-          
+
           #group in runs and in variable
-          
+
           for(v in var){
               if(v!="pr"){
-              
+
               files.paths <- files.paths.all[grepl(v, files.paths.all)]
-              
-              # Read in 1st runpath as df with xy coords to ensure overlay 
-              p1 <- files.paths[[1]] 
+
+              # Read in 1st runpath as df with xy coords to ensure overlay
+              p1 <- files.paths[[1]]
               rast <- rast(p1)
-              rdf1 <- as.data.frame(rast, xy=T) 
-              
-              # Load and convert remaining to single col dfs 
+              rdf1 <- as.data.frame(rast, xy=T)
+
+              # Load and convert remaining to single col dfs
               dfL <- lapply(2:length(files.paths), function(i){
-                p <- files.paths[[i]] 
+                p <- files.paths[[i]]
                 rast <- rast(p)
-                rdf <- as.data.frame(rast) 
+                rdf <- as.data.frame(rast)
                 return(rdf)
-              }) 
-              
+              })
+
               df <- dfL %>% reduce(cbind)
               df <- cbind(rdf1, df)
-              
+
 
               fn <- paste0(rd, x, "/", v, ".csv")
               write.csv(df, fn, row.names = F)
-              
-              
+
+
               } else {
-                
+
                 files.paths <- files.paths.all[grepl("rainfall", files.paths.all)]
-                
-                # Read in 1st runpath as df with xy coords to ensure overlay 
-                p1 <- files.paths[[1]] 
+
+                # Read in 1st runpath as df with xy coords to ensure overlay
+                p1 <- files.paths[[1]]
                 rast <- rast(p1)
-                rdf1 <- as.data.frame(rast, xy=T) 
-                
-                # Load and convert remaining to single col dfs 
+                rdf1 <- as.data.frame(rast, xy=T)
+
+                # Load and convert remaining to single col dfs
                 dfL <- lapply(2:length(files.paths), function(i){
-                  p <- files.paths[[i]] 
+                  p <- files.paths[[i]]
                   rast <- rast(p)
-                  rdf <- as.data.frame(rast) 
+                  rdf <- as.data.frame(rast)
                   return(rdf)
-                }) 
-                
+                })
+
                 df <- dfL %>% reduce(cbind)
                 df <- cbind(rdf1, df)
-                
-                
+
+
                 fn <- paste0(rd, x, "/", v, ".csv")
                 write.csv(df, fn, row.names = F)
-                
+
             }
           }
         }
-stopCluster(cl) 
+stopCluster(cl)
 gc()
diff --git a/README.md b/README.md
index ac4a1b22..409dadcf 100644
--- a/README.md
+++ b/README.md
@@ -98,7 +98,7 @@ The UK Climate Projections 2018 (UKCP18) dataset offers insights into the potent
 [HadUK-Grid](https://www.metoffice.gov.uk/research/climate/maps-and-data/data/haduk-grid/haduk-grid) is a comprehensive collection of climate data for the UK, compiled from various land surface observations across the country. This data is organized into a uniform grid to ensure consistent coverage throughout the UK at up to 1km x 1km resolution. The dataset, spanning from 1836 to the present, includes a variety of climate variables such as air temperature, precipitation, sunshine, and wind speed, available on daily, monthly, seasonal, and annual timescales.
 
 ### Geographical Dataset
-The geographical dataset can be used for visualising climate data. It mainly includes administrative boundaries published by the Office for National Statistics (ONS). The dataset is sharable under the [Open Government Licence v.3.0](https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/) and is available for download via this [link](https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/NUTS_Level_1_January_2018_FCB_in_the_United_Kingdom_2022/FeatureServer/replicafilescache/NUTS_Level_1_January_2018_FCB_in_the_United_Kingdom_2022_7279368953270783580.zip). We include a copy in the `data/Geofiles` folder for convenience. In addition, the clips for three cities' boundaries from the same dataset are copied to `three.cities` subfolder. 
+The geographical dataset can be used for visualising climate data. It mainly includes administrative boundaries published by the Office for National Statistics (ONS). The dataset is sharable under the [Open Government Licence v.3.0](https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/) and is available for download via this [link](https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/NUTS_Level_1_January_2018_FCB_in_the_United_Kingdom_2022/FeatureServer/replicafilescache/NUTS_Level_1_January_2018_FCB_in_the_United_Kingdom_2022_7279368953270783580.zip). We include a copy in the `data/Geofiles` folder for convenience. In addition, the clips for three cities' boundaries from the same dataset are copied to `three.cities` subfolder.
 
 ## Why Bias Correction?