diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..5397d1894 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +planemo/ diff --git a/tools/regionalgam/ab-index.R b/tools/regionalgam/ab-index.R index 8db00997e..0c336715d 100644 --- a/tools/regionalgam/ab-index.R +++ b/tools/regionalgam/ab-index.R @@ -1,14 +1,13 @@ #!/usr/bin/env Rscript -args = commandArgs(trailingOnly=TRUE) +args <- commandArgs(trailingOnly = TRUE) source(args[1]) +input <- data.frame(data.table::fread(args[2])) +pheno <- read.table(args[3], header = TRUE, sep = " ") -tryCatch({input = read.table(args[2], header=TRUE,sep=" ")},finally={input = read.table(args[2], header=TRUE,sep=",")}) -pheno = read.table(args[3], header=TRUE,sep=" ") - -if("TREND" %in% colnames(input)){ - input <- input[input$TREND==1,c("SPECIES","SITE","YEAR","MONTH","DAY","COUNT")] +if ("TREND" %in% colnames(input)) { + input <- input[input$TREND == 1, c("SPECIES", "SITE", "YEAR", "MONTH", "DAY", "COUNT")] } -data.index <- abundance_index(input, pheno) -write.table(data.index, file="data.index", row.names=FALSE, sep=" ") +data_index <- abundance_index(input, pheno) +write.table(data_index, file = "data_index", row.names = FALSE, sep = " ") diff --git a/tools/regionalgam/ab-index.xml b/tools/regionalgam/ab-index.xml index ef61f0e01..b4335c877 100644 --- a/tools/regionalgam/ab-index.xml +++ b/tools/regionalgam/ab-index.xml @@ -3,12 +3,14 @@ regionalgam_macros.xml - + + r-data.table + @@ -16,7 +18,7 @@ - + diff --git a/tools/regionalgam/autocorr-res-acf.R b/tools/regionalgam/autocorr-res-acf.R index 6bb4c8aa2..586aa36bd 100644 --- a/tools/regionalgam/autocorr-res-acf.R +++ b/tools/regionalgam/autocorr-res-acf.R @@ -1,10 +1,10 @@ #!/usr/bin/env Rscript -args = commandArgs(trailingOnly=TRUE) +args <- commandArgs(trailingOnly = TRUE) load(args[1]) -png('output-acf.png',width = 480, height = 480, units = "px") -graph<-acf(residuals(mod,type="normalized"),plot=FALSE) -plot(graph, main="Acf residuals") +png("output_acf.png", width = 480, height = 480, units = "px") +graph <- acf(residuals(mod, type = "normalized"), plot = FALSE) +plot(graph, main = "Acf residuals") invisible(dev.off()) -sink("output-acf.txt") +sink("output_acf.txt") print(graph) diff --git a/tools/regionalgam/autocorr-res-acf.xml b/tools/regionalgam/autocorr-res-acf.xml index cab84170c..0d381e771 100644 --- a/tools/regionalgam/autocorr-res-acf.xml +++ b/tools/regionalgam/autocorr-res-acf.xml @@ -6,17 +6,17 @@ - - + + @@ -31,7 +31,7 @@ 0) { - x = x[-rm_inds] - y = y[-rm_inds] + x <- x[-rm_inds] + y <- y[-rm_inds] } # Determine values of trapezoids under curve Get inds - inds = 1:(length(x) - 1) + inds <- 1:(length(x) - 1) # Determine area using trapezoidal rule Area = ( (b1 + b2)/2 ) * h where b1 and b2 are lengths of bases # (the parallel sides) and h is the height (the perpendicular distance between two bases) - areas = ((y[inds] + y[inds + 1])/2) * diff(x) + areas <- ((y[inds] + y[inds + 1]) / 2) * diff(x) # total area is sum of all trapezoid areas - tot_area = sum(areas) + tot_area <- sum(areas) # Return total area return(tot_area) @@ -136,23 +136,23 @@ trap_area = function(x, y = NULL) { -trap_index = function(sp_data, data_col = "IMP", time_col = "DAYNO", by_col = c("SPECIES", "SITE", "YEAR")) { +trap_index <- function(sp_data, data_col = "IMP", time_col = "DAYNO", by_col = c("SPECIES", "SITE", "YEAR")) { # Build output data.frame - out_obj = unique(sp_data[, by_col]) + out_obj <- unique(sp_data[, by_col]) # Set row.names to be equal to collapsing of output rows (will be unique, you need them to make uploading # values back to data.frame will be easier) - row.names(out_obj) = apply(out_obj, 1, paste, collapse = "_") + row.names(out_obj) <- apply(out_obj, 1, paste, collapse = "_") # Using this row.names from out_obj above as index in by function to loop through values all unique combs # of by_cols and fit trap_area to data - ind_dat = by(sp_data[, c(time_col, data_col)], apply(sp_data[, by_col], 1, paste, collapse = "_"), trap_area) + ind_dat <- by(sp_data[, c(time_col, data_col)], apply(sp_data[, by_col], 1, paste, collapse = "_"), trap_area) # Add this data to output object - out_obj[names(ind_dat), "SINDEX"] = round(ind_dat/7, 1) + out_obj[names(ind_dat), "SINDEX"] <- round(ind_dat / 7, 1) # Set row.names to defaults - row.names(out_obj) = NULL + row.names(out_obj) <- NULL # Return output object return(out_obj) @@ -161,36 +161,36 @@ trap_index = function(sp_data, data_col = "IMP", time_col = "DAYNO", by_col = c( #' flight_curve Function #' This function compute the flight curve across and years -#' @param your_dataset A data.frame containing original species count data. The data format is a csv (comma "," separated) with 6 columns with headers, namely "species","transect_id","visit_year","visit_month","visit_day","sp_count" +#' @param your_dataset A data.frame containing original species count data. The data format is a csv (comma "',"' separated) with 6 columns with headers, namely "species","transect_id","visit_year","visit_month","visit_day","sp_count" #' @keywords standardize flight curve #' @export #' @examples #' flight_curve() -flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur = 1) { +flight_curve <- function(your_dataset, GamFamily = "nb", MinVisit = 2, MinOccur = 1) { # nolint - if("mgcv" %in% installed.packages() == "FALSE") { + if ("mgcv" %in% installed.packages() == "FALSE") { print("mgcv package is not installed.") x <- readline("Do you want to install it? Y/N") - if (x == 'Y') { + if (x == "Y") { install.packages("mgcv") } - if (x == 'N') { + if (x == "N") { stop("flight curve can not be computed without the mgcv package, sorry") } } flight_pheno <- data.frame() - your_dataset$DAYNO <- strptime(paste(your_dataset$DAY, your_dataset$MONTH, + your_dataset$DAYNO <- strptime(paste(your_dataset$DAY, your_dataset$MONTH, # nolint your_dataset$YEAR, sep = "/"), "%d/%m/%Y")$yday + 1 dataset <- your_dataset[, c("SPECIES", "SITE", "YEAR", "MONTH", "DAY", "DAYNO", "COUNT")] sample_year <- unique(dataset$YEAR) sample_year <- sample_year[order(sample_year)] - if (length(sample_year) >1 ) { + if (length(sample_year) > 1) { for (y in sample_year) { dataset_y <- dataset[dataset$YEAR == y, ] @@ -201,7 +201,7 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur dataset_y <- dataset_y[dataset_y$SITE %in% vis$SITE[vis$x >= MinVisit], ] nsite <- length(unique(dataset_y$SITE)) if (nsite < 1) { - print(paste("No sites with sufficient visits and occurence, MinOccur:", MinOccur, " MinVisit: ", MinVisit, " for " , dataset$SPECIES[1],"at year", y)) + print(paste("No sites with sufficient visits and occurence, MinOccur:", MinOccur, " MinVisit: ", MinVisit, " for ", dataset$SPECIES[1], "at year", y)) next } # Determine missing days and add to dataset @@ -211,14 +211,14 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur 200, replace = F)]), ] sp_data_all <- sp_data_all } - sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 - print(paste("Fitting the GAM for",as.character(sp_data_all$SPECIES[1]),"and year",y,"with",length(unique(sp_data_all$SITE)),"sites :",Sys.time())) - if(length(unique(sp_data_all$SITE))>1){ - gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) -1, + sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 # nolint + print(paste("Fitting the GAM for", as.character(sp_data_all$SPECIES[1]), "and year", y, "with", length(unique(sp_data_all$SITE)), "sites :", Sys.time())) + if (length(unique(sp_data_all$SITE)) > 1) { + gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) - 1, data = sp_data_all, family = GamFamily), silent = TRUE) } else { - gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1, + gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") - 1, data = sp_data_all, family = GamFamily), silent = TRUE) } # Give a second try if the GAM does not converge the first time @@ -232,18 +232,18 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur else { sp_data_all <- sp_data_all } - sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 - print(paste("Fitting the GAM for",sp_data_all$SPECIES[1],"at year", y,"with",length(unique(sp_data_all$SITE)),"sites :",Sys.time(),"second try")) - if(length(unique(sp_data_all$SITE))>1){ - gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) -1, + sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 # nolint + print(paste("Fitting the GAM for", sp_data_all$SPECIES[1], "at year", y, "with", length(unique(sp_data_all$SITE)), "sites :", Sys.time(), "second try")) + if (length(unique(sp_data_all$SITE)) > 1) { + gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) - 1, data = sp_data_all, family = GamFamily), silent = TRUE) } else { - gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1, + gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") - 1, data = sp_data_all, family = GamFamily), silent = TRUE) } if (class(gam_obj_site)[1] == "try-error") { - print(paste("Error in fitting the flight period for",sp_data_all$SPECIES[1],"at year", y,"no convergence after two trial")) + print(paste("Error in fitting the flight period for", sp_data_all$SPECIES[1], "at year", y, "no convergence after two trial")) sp_data_all[, "FITTED"] <- NA sp_data_all[, "COUNT_IMPUTED"] <- NA sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA @@ -258,8 +258,8 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0 # if infinite number in predict replace with NA. - if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){ - print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values")) + if (sum(is.infinite(sp_data_all[, "FITTED"])) > 0) { + print(paste("Error in the flight period for", sp_data_all$SPECIES[1], "at year", y, "weird predicted values")) sp_data_all[, "FITTED"] <- NA sp_data_all[, "COUNT_IMPUTED"] <- NA sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA @@ -275,10 +275,10 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur # Rename sum column names(site_sums)[names(site_sums) == "x"] <- "SITE_YR_FSUM" # Add data to sp_data data.frame (ensure merge does not sort the data!) - sp_data_all = merge(sp_data_all, site_sums, by <- c("SITE"), + sp_data_all <- merge(sp_data_all, site_sums, by <- c("SITE"), all = TRUE, sort = FALSE) # Calculate normalized values - sp_data_all[, "NM"] <- sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM + sp_data_all[, "NM"] <- sp_data_all$FITTED / sp_data_all$SITE_YR_FSUM } } } @@ -291,8 +291,8 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0 # if infinite number in predict replace with NA. - if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){ - print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values")) + if (sum(is.infinite(sp_data_all[, "FITTED"])) > 0) { + print(paste("Error in the flight period for", sp_data_all$SPECIES[1], "at year", y, "weird predicted values")) sp_data_all[, "FITTED"] <- NA sp_data_all[, "COUNT_IMPUTED"] <- NA sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA @@ -303,15 +303,15 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- sp_data_all$FITTED[is.na(sp_data_all$COUNT)] # Define the flight curve from the fitted values and append them over # years (this is one flight curve per year for all site) - site_sums = aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE), + site_sums <- aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE), FUN = sum) # Rename sum column - names(site_sums)[names(site_sums) == "x"] = "SITE_YR_FSUM" + names(site_sums)[names(site_sums) == "x"] <- "SITE_YR_FSUM" # Add data to sp_data data.frame (ensure merge does not sort the data!) - sp_data_all = merge(sp_data_all, site_sums, by = c("SITE"), all = TRUE, + sp_data_all <- merge(sp_data_all, site_sums, by = c("SITE"), all = TRUE, sort = FALSE) # Calculate normalized values - sp_data_all[, "NM"] = sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM + sp_data_all[, "NM"] <- sp_data_all$FITTED / sp_data_all$SITE_YR_FSUM } } sp_data_filled <- sp_data_all @@ -321,7 +321,7 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur sp_data_filled$DAYNO, sep = "_")), ] flight_curve <- flight_curve[order(flight_curve$DAYNO), ] # bind if exist else create - if (is.na(flight_curve$nm[1])) next() + if (is.na(flight_curve$nm[1])) next() # nolint flight_pheno <- rbind(flight_pheno, flight_curve) @@ -337,7 +337,7 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur dataset_y <- dataset_y[dataset_y$SITE %in% vis$SITE[vis$x >= MinVisit], ] nsite <- length(unique(dataset_y$SITE)) if (nsite < 1) { - stop(paste("No sites with sufficient visits and occurence, MinOccur:", MinOccur, " MinVisit: ", MinVisit, " for " ,dataset$SPECIES[1],"at year", y)) + stop(paste("No sites with sufficient visits and occurence, MinOccur:", MinOccur, " MinVisit: ", MinVisit, " for ", dataset$SPECIES[1], "at year", y)) } # Determine missing days and add to dataset sp_data_all <- year_day_func(dataset_y) @@ -348,14 +348,14 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur else { sp_data_all <- sp_data_all } - sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 - print(paste("Fitting the GAM for",sp_data_all$SPECIES[1],"at year", y,":",Sys.time())) - if(length(unique(sp_data_all$SITE))>1){ - gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) -1, + sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 # nolint + print(paste("Fitting the GAM for", sp_data_all$SPECIES[1], "at year", y, ":", Sys.time())) + if (length(unique(sp_data_all$SITE)) > 1) { + gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) - 1, data = sp_data_all, family = GamFamily), silent = TRUE) } else { - gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1, + gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") - 1, data = sp_data_all, family = GamFamily), silent = TRUE) } # Give a second try if the GAM does not converge the first time @@ -369,18 +369,18 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur else { sp_data_all <- sp_data_all } - sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 - print(paste("Fitting the GAM for",sp_data_all$SPECIES[1],"at year", y,"with",length(unique(sp_data_all$SITE)),"sites :",Sys.time(),"second try")) - if(length(unique(sp_data_all$SITE))>1){ + sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1 # nolint + print(paste("Fitting the GAM for", sp_data_all$SPECIES[1], "at year", y, "with", length(unique(sp_data_all$SITE)), "sites :", Sys.time(), "second try")) + if (length(unique(sp_data_all$SITE)) > 1) { gam_obj_site <- try(mgcv::bam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) - 1, data = sp_data_all, family = GamFamily), silent = TRUE) } else { - gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1, + gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") - 1, data = sp_data_all, family = GamFamily), silent = TRUE) } if (class(gam_obj_site)[1] == "try-error") { - print(paste("Error in fitting the flight period for",sp_data_all$SPECIES[1],"at year", y,"no convergence after two trial")) + print(paste("Error in fitting the flight period for", sp_data_all$SPECIES[1], "at year", y, "no convergence after two trial")) sp_data_all[, "FITTED"] <- NA sp_data_all[, "COUNT_IMPUTED"] <- NA sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA @@ -395,8 +395,8 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0 # if infinite number in predict replace with NA. - if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){ - print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values")) + if (sum(is.infinite(sp_data_all[, "FITTED"])) > 0) { + print(paste("Error in the flight period for", sp_data_all$SPECIES[1], "at year", y, "weird predicted values")) sp_data_all[, "FITTED"] <- NA sp_data_all[, "COUNT_IMPUTED"] <- NA sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA @@ -412,10 +412,10 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur # Rename sum column names(site_sums)[names(site_sums) == "x"] <- "SITE_YR_FSUM" # Add data to sp_data data.frame (ensure merge does not sort the data!) - sp_data_all = merge(sp_data_all, site_sums, by <- c("SITE"), + sp_data_all <- merge(sp_data_all, site_sums, by <- c("SITE"), all = TRUE, sort = FALSE) # Calculate normalized values - sp_data_all[, "NM"] <- sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM + sp_data_all[, "NM"] <- sp_data_all$FITTED / sp_data_all$SITE_YR_FSUM } } } @@ -428,8 +428,8 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0 # if infinite number in predict replace with NA. - if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){ - print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values")) + if (sum(is.infinite(sp_data_all[, "FITTED"])) > 0) { + print(paste("Error in the flight period for", sp_data_all$SPECIES[1], "at year", y, "weird predicted values")) sp_data_all[, "FITTED"] <- NA sp_data_all[, "COUNT_IMPUTED"] <- NA sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA @@ -440,15 +440,15 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- sp_data_all$FITTED[is.na(sp_data_all$COUNT)] # Define the flight curve from the fitted values and append them over # years (this is one flight curve per year for all site) - site_sums = aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE), + site_sums <- aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE), FUN = sum) # Rename sum column - names(site_sums)[names(site_sums) == "x"] = "SITE_YR_FSUM" + names(site_sums)[names(site_sums) == "x"] <- "SITE_YR_FSUM" # Add data to sp_data data.frame (ensure merge does not sort the data!) - sp_data_all = merge(sp_data_all, site_sums, by = c("SITE"), all = TRUE, + sp_data_all <- merge(sp_data_all, site_sums, by = c("SITE"), all = TRUE, sort = FALSE) # Calculate normalized values - sp_data_all[, "NM"] = sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM + sp_data_all[, "NM"] <- sp_data_all$FITTED / sp_data_all$SITE_YR_FSUM } } sp_data_filled <- sp_data_all @@ -462,7 +462,7 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur } return(flight_pheno) -} +} # nolint #' abundance_index Function @@ -475,9 +475,9 @@ flight_curve <- function(your_dataset, GamFamily = 'nb', MinVisit = 2, MinOccur #' @examples #' abundance_index() -abundance_index <- function(your_dataset,flight_pheno) { +abundance_index <- function(your_dataset, flight_pheno) { -your_dataset$DAYNO <- strptime(paste(your_dataset$DAY, your_dataset$MONTH, +your_dataset$DAYNO <- strptime(paste(your_dataset$DAY, your_dataset$MONTH, # nolint your_dataset$YEAR, sep = "/"), "%d/%m/%Y")$yday + 1 dataset <- your_dataset[, c("SPECIES", "SITE", "YEAR", "MONTH", @@ -487,7 +487,7 @@ sample_year <- unique(dataset$YEAR) sample_year <- sample_year[order(sample_year)] -if (length(sample_year)>1){ +if (length(sample_year) > 1) { for (y in sample_year) { @@ -496,7 +496,7 @@ for (y in sample_year) { dataset_y <- dataset[dataset$YEAR == y, ] sp_data_site <- year_day_func(dataset_y) - sp_data_site$trimDAYNO <- sp_data_site$DAYNO - min(sp_data_site$DAYNO) + 1 + sp_data_site$trimDAYNO <- sp_data_site$DAYNO - min(sp_data_site$DAYNO) + 1 # nolint sp_data_site <- merge(sp_data_site, year_pheno[, c("DAYNO", "nm")], by = c("DAYNO"), all.x = TRUE, sort = FALSE) @@ -515,11 +515,11 @@ for (y in sample_year) { names(pro_count_agg) <- c("SITE", "PROP_PHENO_SAMPLED") # remove samples outside the monitoring window - sp_data_site$COUNT[sp_data_site$nm==0] <- NA + sp_data_site$COUNT[sp_data_site$nm == 0] <- NA # Compute the regional GAM index - if(length(unique(sp_data_site$SITE))>1){ + if (length(unique(sp_data_site$SITE)) > 1) { glm_obj_site <- glm(COUNT ~ factor(SITE) + offset(log(nm)) - 1, data = sp_data_site, family = quasipoisson(link = "log"), control = list(maxit = 100)) } else { @@ -547,7 +547,7 @@ for (y in sample_year) { "site_week"] ## Compute the regional GAM index - print(paste("Compute index for",sp_data_site$SPECIES[1],"at year", y,"for",length(unique(sp_data_site$SITE)),"sites:",Sys.time())) + print(paste("Compute index for", sp_data_site$SPECIES[1], "at year", y, "for", length(unique(sp_data_site$SITE)), "sites:", Sys.time())) regional_gam_index <- trap_index(sp_data_site, data_col = "COUNT_IMPUTED", time_col = "DAYNO", by_col = c("SPECIES", "SITE", "YEAR")) @@ -574,7 +574,7 @@ for (y in sample_year) { dataset_y <- dataset[dataset$YEAR == y, ] sp_data_site <- year_day_func(dataset_y) - sp_data_site$trimDAYNO <- sp_data_site$DAYNO - min(sp_data_site$DAYNO) + 1 + sp_data_site$trimDAYNO <- sp_data_site$DAYNO - min(sp_data_site$DAYNO) + 1 # nolint sp_data_site <- merge(sp_data_site, year_pheno[, c("DAYNO", "nm")], by = c("DAYNO"), all.x = TRUE, sort = FALSE) @@ -593,10 +593,10 @@ for (y in sample_year) { names(pro_count_agg) <- c("SITE", "PROP_PHENO_SAMPLED") # remove samples outside the monitoring window - sp_data_site$COUNT[sp_data_site$nm==0] <- NA + sp_data_site$COUNT[sp_data_site$nm == 0] <- NA # Compute the regional GAM index - if(length(unique(sp_data_site$SITE))>1){ + if (length(unique(sp_data_site$SITE)) > 1) { glm_obj_site <- glm(COUNT ~ factor(SITE) + offset(log(nm)) - 1, data = sp_data_site, family = quasipoisson(link = "log"), control = list(maxit = 100)) } else { @@ -624,7 +624,7 @@ for (y in sample_year) { "site_week"] # Compute the regional gam index - print(paste("Compute index for",sp_data_site$SPECIES[1],"at year", y,"for",length(unique(sp_data_site$SITE)),"sites:",Sys.time())) + print(paste("Compute index for", sp_data_site$SPECIES[1], "at year", y, "for", length(unique(sp_data_site$SITE)), "sites:", Sys.time())) regional_gam_index <- trap_index(sp_data_site, data_col = "COUNT_IMPUTED", time_col = "DAYNO", by_col = c("SPECIES", "SITE", "YEAR")) diff --git a/tools/regionalgam/flight-curve.R b/tools/regionalgam/flight-curve.R index 353ccd197..0bcd6dafb 100644 --- a/tools/regionalgam/flight-curve.R +++ b/tools/regionalgam/flight-curve.R @@ -1,8 +1,8 @@ #!/usr/bin/env Rscript -args = commandArgs(trailingOnly=TRUE) +args <- commandArgs(trailingOnly = TRUE) source(args[1]) #TODO replace by library(regionalGAM) if available as official package from bioconda -tryCatch({input = read.table(args[2], header=TRUE,sep=" ")},finally={input = read.table(args[2], header=TRUE,sep=",")}) -dataset1 <- input[,c("SPECIES", "SITE", "YEAR", "MONTH", "DAY", "COUNT")] +input <- data.frame(data.table::fread(args[2])) +dataset1 <- input[, c("SPECIES", "SITE", "YEAR", "MONTH", "DAY", "COUNT")] pheno <- flight_curve(dataset1, MinVisit = args[3], MinOccur = args[4]) -write.table(pheno, file="pheno", row.names=FALSE, sep=" ") +write.table(pheno, file = "pheno", row.names = FALSE, sep = " ") diff --git a/tools/regionalgam/flight-curve.xml b/tools/regionalgam/flight-curve.xml index 7a0e26276..2eb8add3b 100644 --- a/tools/regionalgam/flight-curve.xml +++ b/tools/regionalgam/flight-curve.xml @@ -5,14 +5,15 @@ r-mgcv + r-data.table @@ -34,7 +35,7 @@ - + @@ -50,7 +51,7 @@ This tool is an implementation of the flight_curve function RegionalGAM package: This function computes the annual phenology on a specific region. -The output tabular file can be used to impute expected count values with the abundance index computation tool. +The output tabular file can be used to impute expected count values with the abundance index computation tool. | diff --git a/tools/regionalgam/glmmpql.R b/tools/regionalgam/glmmpql.R index aca92351d..87f806f7d 100644 --- a/tools/regionalgam/glmmpql.R +++ b/tools/regionalgam/glmmpql.R @@ -2,15 +2,15 @@ library(nlme) library(MASS) -args = commandArgs(trailingOnly=TRUE) -input = read.table(args[1], header=TRUE,sep=" ") -glmm.mod_fullyear <- glmmPQL(regional_gam~ as.factor(YEAR)-1,data=input,family=quasipoisson,random=~1|SITE, correlation = corAR1(form = ~ YEAR | SITE),verbose = FALSE) +args <- commandArgs(trailingOnly = TRUE) +input <- read.table(args[1], header = TRUE, sep = " ") +glmm_mod_fullyear <- glmmPQL(regional_gam ~ as.factor(YEAR) - 1, data = input, family = quasipoisson, random = ~ 1 | SITE, correlation = corAR1(form = ~ YEAR | SITE), verbose = FALSE) -col.index <- as.numeric(glmm.mod_fullyear$coefficients$fixed) +col_index <- as.numeric(glmm_mod_fullyear$coefficients$fixed) year <- unique(input$YEAR) -write.table(col.index, file="output-glmmpql", row.names=FALSE, sep=" ") +write.table(col_index, file = "output-glmmpql", row.names = FALSE, sep = " ") -png('output-plot.png') -plot(year,col.index,type='o', xlab="year",ylab="collated index") +png("output-plot.png") +plot(year, col_index, type = "o", xlab = "year", ylab = "collated index") invisible(dev.off()) diff --git a/tools/regionalgam/gls-adjusted.R b/tools/regionalgam/gls-adjusted.R index 149913a18..215015720 100644 --- a/tools/regionalgam/gls-adjusted.R +++ b/tools/regionalgam/gls-adjusted.R @@ -2,15 +2,15 @@ library(nlme) library(MASS) -args = commandArgs(trailingOnly=TRUE) -input1 = read.table(args[1], header=TRUE) #input1=col.index =glmmpql-output -input2 = read.table(args[2], header=TRUE,sep=" ") #input2=data.index =abundance_index-output +args <- commandArgs(trailingOnly = TRUE) +input1 <- read.table(args[1], header = TRUE) +input2 <- read.table(args[2], header = TRUE, sep = " ") -input1<-as.matrix(input1) +input1 <- as.matrix(input1) year <- unique(input2$YEAR) -mod <- gls(input1~year, correlation = corARMA(p=2)) -summary<-summary(mod) +mod <- gls(input1 ~ year, correlation = corARMA(p = 2)) +summary <- summary(mod) save(mod, file = "mod_adjusted.rda") -capture.output(summary, file="mod_adjusted-summary.txt") +capture.output(summary, file = "mod_adjusted-summary.txt") diff --git a/tools/regionalgam/gls.R b/tools/regionalgam/gls.R index 36a8555a0..ef853266e 100644 --- a/tools/regionalgam/gls.R +++ b/tools/regionalgam/gls.R @@ -2,15 +2,15 @@ library(nlme) library(MASS) -args = commandArgs(trailingOnly=TRUE) -input1 = read.table(args[1], header=TRUE) #input1=col.index =glmmpql-output -input2 = read.table(args[2], header=TRUE,sep=" ") #input2=data.index =abundance_index-output +args <- commandArgs(trailingOnly = TRUE) +input1 <- read.table(args[1], header = TRUE) +input2 <- read.table(args[2], header = TRUE, sep = " ") -input1<-as.matrix(input1) +input1 <- as.matrix(input1) year <- unique(input2$YEAR) -mod <- gls(input1~year) -summary<-summary(mod) +mod <- gls(input1 ~ year) +summary <- summary(mod) save(mod, file = "mod.rda") -capture.output(summary, file="mod-summary.txt") +capture.output(summary, file = "mod-summary.txt") diff --git a/tools/regionalgam/plot-trend.R b/tools/regionalgam/plot-trend.R index 86ed9f297..e51a949c7 100644 --- a/tools/regionalgam/plot-trend.R +++ b/tools/regionalgam/plot-trend.R @@ -2,22 +2,22 @@ library(nlme) library(MASS) -args = commandArgs(trailingOnly=TRUE) -input = read.table(args[1], header=TRUE,sep=" ") #input=data.index =ab_index-output +args <- commandArgs(trailingOnly = TRUE) +input <- read.table(args[1], header = TRUE, sep = " ") load(args[2]) -glmm.mod_fullyear <- glmmPQL(regional_gam~ as.factor(YEAR)-1,data=input,family=quasipoisson,random=~1|SITE, correlation = corAR1(form = ~ YEAR | SITE),verbose = FALSE) +glmm_mod_fullyear <- glmmPQL(regional_gam ~ as.factor(YEAR) - 1, data = input, family = quasipoisson, random = ~ 1 | SITE, correlation = corAR1(form = ~ YEAR | SITE), verbose = FALSE) -col.index <- as.numeric(glmm.mod_fullyear$coefficients$fixed) +col_index <- as.numeric(glmm_mod_fullyear$coefficients$fixed) year <- unique(input$YEAR) -png('output-plot-trend.png') -plot(year,col.index, type='o',xlab="year",ylab="collated index") -abline(mod,lty=2,col="red") +png("output-plot-trend.png") +plot(year, col_index, type = "o", xlab = "year", ylab = "collated index") +abline(mod, lty = 2, col = "red") invisible(dev.off()) -d<-data.frame(year,col.index) -colnames(d)<-c("year","collated index") -write.table(d,"s_trend.tsv",sep=" ",row.names=FALSE) +d <- data.frame(year, col_index) +colnames(d) <- c("year", "collated index") +write.table(d, "s_trend.tsv", sep = " ", row.names = FALSE) cat("Show trend line on abundance plot")