From fdd43a7b159fd2f6df1706441493925131844a45 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Thu, 23 Feb 2023 17:47:55 -0800 Subject: [PATCH 001/221] Tapis v3 App shell script --- .../cell-celltypist-singularity/app3.sh | 344 ++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100755 resources/agave_apps/cell-celltypist-singularity/app3.sh diff --git a/resources/agave_apps/cell-celltypist-singularity/app3.sh b/resources/agave_apps/cell-celltypist-singularity/app3.sh new file mode 100755 index 00000000..39f688e4 --- /dev/null +++ b/resources/agave_apps/cell-celltypist-singularity/app3.sh @@ -0,0 +1,344 @@ +# +# Wrapper script for running app through the iReceptor Gateway. +# + +# Get the script directory where all the code is. +SCRIPT_DIR=`pwd` +SCRIPT_DIR=${_tapisExecSystemExecDir} +echo "IR-INFO: Running job from ${SCRIPT_DIR}" +ls / + +######################################################################## +# Tapis configuration/settings +######################################################################## + +# +# Tapis/AGAVE job variables. +# + +AGAVE_JOB_ID=${AGAVE_JOB_ID} +AGAVE_JOB_NAME=${AGAVE_JOB_NAME} +AGAVE_LOG_NAME=${AGAVE_JOB_NAME}-${AGAVE_JOB_ID} +AGAVE_JOB_PROCESSORS_PER_NODE=${AGAVE_JOB_PROCESSORS_PER_NODE} +AGAVE_JOB_MEMORY_PER_NODE=${AGAVE_JOB_MEMORY_PER_NODE} + +# +# Tapis App Parameters: Will be subsituted by Tapis. If they don't exist +# use command line arguments so we can test from the command line. +# + +# Tapis parameter ir_gateway_url contains the URL of the source gateway. Use +# this to gather iReceptor Gateway specific resources if needed. +GATEWAY_URL="${ir_gateway_url}" + +# We pass a singularity image to get from the Gateway. This image is provided +# on the Gateway because we only want to run singularity images that are approved +# by the gateway. +singularity_image="${singularity_image}" +echo "IR-INFO: Singularity image = ${singularity_image}" + +# +# Tapis App Inputs +# + +# Download file is a ZIP archive that is provided by the Gateway and contains +# the results of the users query. This is the data that is being analyzed. +if [ -z "${download_file}" ]; then + ZIP_FILE=$1 +else + ZIP_FILE=${download_file} +fi + +# If you want to tell Tapis that the job failed +export JOB_ERROR=1 + +######################################################################## +# Done Tapis setup/processing. +######################################################################## + +echo "IR-INFO: Using Gateway ${GATEWAY_URL}" + + +# Get the iRecpetor Gateway utilities from the Gateway +echo -n "IR-INFO: Downloading iReceptor Gateway Utilities from the Gateway - " +date +GATEWAY_UTIL_DIR=${gateway_util_dir} +#mkdir -p ${GATEWAY_UTIL_DIR} +#pushd ${GATEWAY_UTIL_DIR} > /dev/null +#wget --no-verbose -r -nH --no-parent --cut-dir=1 --reject="index.html*" --reject="robots.txt*" ${GATEWAY_URL}/gateway_utilities/ +#popd > /dev/null +#echo -n "IR-INFO: Done downloading iReceptor Gateway Utilities - " +#date +echo "IR-INFO: Using Gateway Utilities from ${GATEWAY_UTIL_DIR}" +#ln -s ${GATEWAY_UTIL_DIR} + + +# Load the iReceptor Gateway bash utility functions. +#source ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/gateway_utilities.sh +source ${GATEWAY_UTIL_DIR}/gateway_utilities.sh +if [ $? -ne 0 ] +then + echo "IR-ERROR: Could not load GATEWAY UTILIIES" + exit 1 +fi + +# This directory is defined in the gateway_utilities.sh. The Gateway +# relies on this being set. If it isn't set, abort as something has +# gone wrong with loading the Gateway utilties. +echo "IR-INFO: Gateway analysis directory = ${GATEWAY_ANALYSIS_DIR}" +if [ -z "${GATEWAY_ANALYSIS_DIR}" ]; then + echo "IR-ERROR: GATEWAY_ANALYSIS_DIR not defined, gateway_utilities not loaded correctly." + exit 1 +fi +echo "IR-INFO: Done loading iReceptor Gateway Utilities" + +# Load any modules that are required by the App. +#module load singularity +#module load scipy-stack + +# Get the singularity image from the Gateway +#echo -n "IR-INFO: Downloading singularity image ${singularity_image} from the Gateway - " +#date +#gateway_get_singularity ${singularity_image} ${SCRIPT_DIR} +#echo -n "IR-INFO: Done ownloading singularity image from the Gateway - " +#date + +# The Gateway provides information about the download in the file info.txt +INFO_FILE="info.txt" +MANIFEST_FILE="AIRR-manifest.json" + +# Start +printf "IR-INFO:\nIR-INFO:\n" +printf "IR-INFO: START at $(date)\n" +printf "IR-INFO: PROCS = ${AGAVE_JOB_PROCESSORS_PER_NODE}\n" +printf "IR-INFO: MEM = ${AGAVE_JOB_MEMORY_PER_NODE}\n" +printf "IR-INFO: SLURM JOB ID = ${SLURM_JOB_ID}\n" +printf "IR-INFO: " +lscpu | grep "Model name" +printf "IR-INFO:\nIR-INFO:\n" + +# This function is called by the iReceptor Gateway utilities function gateway_split_repertoire +# The gateway utility function splits all data into repertoires and then calls this function +# for a single repertoire. As such, this function should perform all analysis required for a +# repertoire. +function run_analysis() +# Parameters: +# $1 output directory +# $2 repository name [string] +# $3 repertoire_id ("NULL" if should skip repertoire processing) +# $4 repertoire file (Not used if repertoire_id == NULL) +# $5 manifest file +# $6 analysis type +{ + # Use local variables - no scope issues please... + local output_directory=$1 + local repository_name=$2 + local repertoire_id=$3 + local repertoire_file=$4 + local manifest_file=$5 + local analysis_type=$6 + echo "IR-INFO: Running a Cell Repertoire Analysis with manifest ${manifest_file}" + + # Get a list of rearrangement files to process from the manifest. + local cell_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "cell_file"` ) + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not process manifest file ${manifest_file}" + return + fi + if [ ${#cell_files[@]} != 1 ] + then + echo "IR_ERROR: Celltypist cell analysis only works with a single cell file." + return + fi + local cell_file_count=${#cell_files[@]} + local cell_file=${cell_files[0]} + echo "IR-INFO: Using cell file ${cell_file}" + local gex_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "expression_file"` ) + if [ ${#gex_files[@]} != 1 ] + then + echo "IR_ERROR: CellTypist cell analysis only works with a single expression file." + return + fi + local gex_file=${gex_files[0]} + echo "IR-INFO: Using gex file ${gex_file}" + local rearrangement_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "rearrangement_file"` ) + if [ ${#rearrangement_files[@]} != 1 ] + then + echo "IR_ERROR: CellTypist cell analysis only works with a single rearrangement file." + return + fi + local rearrangement_file=${rearrangement_files[0]} + echo "IR-INFO: Using rearrangement file ${rearrangement_files}" + + # Check to see if we are processing a specific repertoire_id + if [ "${repertoire_id}" != "NULL" ]; then + file_string=`python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id} --separator "_"` + title_string="$(python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id})" + else + file_string="total" + title_string="Total" + fi + + # Clean up special characters in file and title strings. + file_string=`echo ${repository_name}_${file_string} | sed "s/[!@#$%^&*() :/-]/_/g"` + # TODO: Fix this, it should not be required. + title_string=`echo ${title_string} | sed "s/[ ]//g"` + + # Run the CellTypist pipeline within the singularity image on each rearrangement file provided. + echo "IR-INFO: Running CellTypist on $gex_file" + echo "IR-INFO: Mapping ${PWD} to /data" + echo "IR-INFO: Asking for ${AGAVE_JOB_PROCESSORS_PER_NODE} threads" + echo "IR-INFO: Storing output in /data/${output_directory}" + + # log1p normalize the data for CellTypist + echo -n "IR-INFO: log1p normalizing ${gex_file} - " + date + mv ${output_directory}/${gex_file} ${output_directory}/${gex_file}.tmp.h5ad + singularity exec --cleanenv --env PYTHONNOUSERSITE=1 \ + -B ${output_directory}:/data -B ${SCRIPT_DIR}:/localsrc \ + ${SCRIPT_DIR}/${singularity_image} python \ + /localsrc/h5ad-log1pnormalize.py\ + /data/${gex_file}.tmp.h5ad \ + /data/${gex_file} + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not log1p normalize the data" + return + fi + + # Run CellTypist using our internal code that produces slightly modified + # graphs and output. + echo -n "IR-INFO: Running CellTpist on ${gex_file} - " + date + singularity exec --cleanenv --env PYTHONNOUSERSITE=1 \ + -B ${output_directory}:/data -B ${SCRIPT_DIR}:/localsrc \ + ${SCRIPT_DIR}/${singularity_image} \ + python /localsrc/gateway-celltypist.py \ + /data/${gex_file} \ + /data \ + ${repertoire_id}-annotated.h5ad \ + ${title_string} + if [ $? -ne 0 ] + then + echo "IR-ERROR: CellTypist failed on file ${repertoire_id}.h5ad" + return + fi + echo -n "IR-INFO: Done running CellTpist on ${repertoire_id}.h5ad - " + date + + # Copy the CellTypist summary report to the gateway expected summary for this repertoire + echo "IR-INFO: Copying ${output_directory}/majority_voting_v2.pdf to ${output_directory}/${repertoire_id}.pdf" + cp ${output_directory}/majority_voting_v2.pdf ${output_directory}/${repertoire_id}.pdf + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not copy summary file ${output_directory}/majority_voting_v2.pdf" + fi + + # Generate a summary HTML file for the Gateway to present this info to the user + html_file=${output_directory}/${repertoire_id}.html + + # Generate the HTML main block + printf '\n' > ${html_file} + printf '' >> ${html_file} + + # Generate a normal looking iReceptor header + printf '\n' >> ${html_file} + cat ${output_directory}/assets/head-template.html >> ${html_file} + printf "CellTypist: %s\n" ${title_string} >> ${html_file} + printf '\n' >> ${html_file} + + # Generate an iReceptor top bar for the page + cat ${output_directory}/assets/top-bar-template.html >> ${html_file} + + # Generate a normal looking iReceptor header + printf '
' >> ${html_file} + printf "

CellTypist: %s

\n" ${title_string} >> ${html_file} + + printf "

Analysis

\n" >> ${html_file} + printf "

Cell Typist, Majority Vote

\n" >> ${html_file} + printf "

%s

\n" ${title_string} >> ${html_file} + printf '\n' ${repertoire_id}.pdf >> ${html_file} + + # End of main div container + printf '
' >> ${html_file} + + # Use the normal iReceptor footer. + cat ${output_directory}/assets/footer.html >> ${html_file} + + # Generate end body end HTML + printf '' >> ${html_file} + printf '' >> ${html_file} + + # Add the required label file for the Gateway to present the results as a summary. + label_file=${output_directory}/${repertoire_id}.txt + echo "IR-INFO: Generating label file ${label_file}" + echo "${title_string}" > ${label_file} + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not generate label file ${label_file}" + fi + echo "IR-INFO: Done generating label file ${label_file}" + + # Remove the intermediate files generated for CellTypist + rm -f ${output_directory}/${CONTIG_PREFIX}.csv ${output_directory}/${CONTIG_PREFIX}_* + + # We don't want to keep around the generated data files or the manifest file. + rm -f ${output_directory}/${cell_file} ${output_directory}/${gex_file} ${output_directory}/${rearrangement_file} ${output_directory}/${manifest_file} + + # done + printf "IR-INFO: Done running Repertoire Analysis on ${cell_file} at $(date)\n" +} + +# Split the data by repertoire. This creates a directory tree in $GATEWAY_ANALYSIS_DIR +# with a directory per repository and within that a directory per repertoire in +# that repository. In each repertoire directory there will exist an AIRR manifest +# file and the data (as described in the manifest file) from that repertoire. +# +# The gateway utilities use a callback mechanism, calling the +# function run_analysis() on each repertoire. The run_analysis function +# is locally provided and should do all of the processing for a single +# repertoire. +# +# So the pipeline is: +# - Split the data into repertoire directories as described above +# - Run the analysis on each repertoire, calling run_analysis for each +# - Cleanup the intermediate files created by the split process. +# run_analysis() is defined above. +gateway_split_repertoire ${INFO_FILE} ${MANIFEST_FILE} ${ZIP_FILE} ${GATEWAY_ANALYSIS_DIR} "cell_file" ${SCRIPT_DIR}/${singularity_image} +gateway_run_analysis ${INFO_FILE} ${MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} "cell_file" +gateway_cleanup ${ZIP_FILE} ${MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} + +# Make sure we are back where we started, although the gateway functions should +# not change the working directory that we are in. +cd ${SCRIPT_DIR} + +# We want to move the info.txt file to the main directory. +cp ${GATEWAY_ANALYSIS_DIR}/${INFO_FILE} . + +# We want to keep the job error and output files as part of the analysis output. +cp *.err ${GATEWAY_ANALYSIS_DIR} +cp *.out ${GATEWAY_ANALYSIS_DIR} + +# Zip up the analysis results for easy download +echo "IR-INFO: ZIPing analysis results - $(date)" +zip -r ${GATEWAY_ANALYSIS_DIR}.zip ${GATEWAY_ANALYSIS_DIR} +echo "IR-INFO: Done ZIPing analysis results - $(date)" + +# We don't want the analysis files to remain - they are in the ZIP file +echo "IR-INFO: Removing analysis output" +rm -rf ${GATEWAY_ANALYSIS_DIR} + +# We don't want to copy around the singularity image everywhere. +rm -f ${singularity_image} + +# We don't want the iReceptor Utilities to be part of the results. +rm -rf ${GATEWAY_UTIL_DIR} + +# Cleanup the input data files, don't want to return them as part of the resulting analysis +echo "IR-INFO: Removing original ZIP file $ZIP_FILE" +rm -f $ZIP_FILE + +# End +printf "IR-INFO: DONE at $(date)\n\n" + From 483a05cd38a7c27748b133eb9825e077c3717425 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Thu, 23 Feb 2023 17:48:38 -0800 Subject: [PATCH 002/221] UPdates for Tapis v3 --- public/gateway_utilities/gateway_utilities.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/public/gateway_utilities/gateway_utilities.sh b/public/gateway_utilities/gateway_utilities.sh index 04a90c02..3fb0562c 100644 --- a/public/gateway_utilities/gateway_utilities.sh +++ b/public/gateway_utilities/gateway_utilities.sh @@ -350,14 +350,14 @@ function gateway_split_repertoire(){ else ANALYSIS_TYPE=$5 fi - local GATEWAY_SINGULARITY=gateway-singularity.sif - if [ -z "$6" ]; then - GATEWAY_SINGULARITY=gateway-singularity.sif - else - GATEWAY_SINGULARITY=$6 - fi echo "GW-INFO: Analysis type = ${ANALYSIS_TYPE}" - echo "GW-INFO: Gateway singularity image = ${GATEWAY_SINGULARITY}" + #local GATEWAY_SINGULARITY=gateway-singularity.sif + #if [ -z "$6" ]; then + # GATEWAY_SINGULARITY=gateway-singularity.sif + #else + # GATEWAY_SINGULARITY=$6 + #fi + #echo "GW-INFO: Gateway singularity image = ${GATEWAY_SINGULARITY}" # Unzip the iReceptor Gateway ZIP file into the working directory gateway_unzip ${ZIP_FILE} ${WORKING_DIR} From 8272e06e8ffb5e2adb9a631eefc385cdc8000362 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 24 Feb 2023 12:41:37 -0800 Subject: [PATCH 003/221] Refactor to us gateway utility directory directly --- public/gateway_utilities/gateway_utilities.sh | 90 ++++++++++--------- 1 file changed, 48 insertions(+), 42 deletions(-) diff --git a/public/gateway_utilities/gateway_utilities.sh b/public/gateway_utilities/gateway_utilities.sh index 3fb0562c..d0214c06 100644 --- a/public/gateway_utilities/gateway_utilities.sh +++ b/public/gateway_utilities/gateway_utilities.sh @@ -81,7 +81,7 @@ function gateway_unzip() { echo "GW-INFO: Extracting files finished at: `date`" # Remove the copied ZIP file. - rm -f ${ZIP_FILE} + #rm -f ${ZIP_FILE} # Go back to where we started popd > /dev/null @@ -116,33 +116,33 @@ function gateway_cleanup(){ # Determine the files that were extracted for the computation if [ ${ANALYSIS_TYPE} = "rearrangement_file" ] then - data_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) + data_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) elif [ ${ANALYSIS_TYPE} = "clone_file" ] then - data_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) + data_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) elif [ ${ANALYSIS_TYPE} = "cell_file" ] then # Cell analyses have three different types of files to process. - data_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) - expression_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "expression_file"` ) - rearrangement_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "rearrangement_file"` ) + data_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) + expression_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "expression_file"` ) + rearrangement_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "rearrangement_file"` ) fi # Clean up the files we created. First the ZIP file echo "GW-INFO: Removing ${ZIP_FILE}" - rm -f ${ZIP_FILE} + #rm -f ${ZIP_FILE} # Remove any data files extracted from the ZIP - they are big and can be re-generated for f in "${data_files[@]}"; do - rm -f $f + #rm -f $f echo "GW-INFO: Removing ${f}" done for f in "${expression_files[@]}"; do - rm -f $f + #rm -f $f echo "GW-INFO: Removing ${f}" done for f in "${rearrangement_files[@]}"; do - rm -f $f + #rm -f $f echo "GW-INFO: Removing ${f}" done echo "GW-INFO: Done cleaning up files" @@ -183,27 +183,27 @@ function gateway_run_analysis(){ pushd ${WORKING_DIR} > /dev/null # Get the repository from the manifest file. - repository_urls=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "repository_url"` ) + repository_urls=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "repository_url"` ) echo "GW-INFO: Repository URLs = ${repository_urls[@]}" # Get the Reperotire files from the manifest file. - repertoire_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "repertoire_file"` ) + repertoire_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "repertoire_file"` ) echo "GW-INFO: Repertoire files = ${repertoire_files[@]}" # Determine the files to process. We extract the data files from the AIRR-manifest.json # and store them in an array. The type is one of rearrangement_file, cell_file, clone_file if [ ${ANALYSIS_TYPE} = "rearrangement_file" ] then - data_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) + data_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) elif [ ${ANALYSIS_TYPE} = "clone_file" ] then - data_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) + data_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) elif [ ${ANALYSIS_TYPE} = "cell_file" ] then # Cell analyses have three different types of files to process. - data_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) - expression_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "expression_file"` ) - rearrangement_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "rearrangement_file"` ) + data_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) + expression_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "expression_file"` ) + rearrangement_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "rearrangement_file"` ) fi # Check to make sure we have some data files to process in the manifest file. @@ -242,21 +242,21 @@ function gateway_run_analysis(){ # all of the reperotire_ids - we sort them to get unique ids and then # use awk to print them all on the same line to create an array # of repertoire_ids - repertoire_ids=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/preprocess.py $data_file $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) + repertoire_ids=( `python3 ${GATEWAY_UTIL_DIR}/preprocess.py $data_file $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) elif [ ${ANALYSIS_TYPE} = "cell_file" ] then # preprocess-json.py dumps a field of interest from a JSON data file. We want # all of the reperotire_ids - we sort them to get unique ids and then # use awk to print them all on the same line to create an array # of repertoire_ids - repertoire_ids=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/preprocess-json.py $data_file Cell $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) + repertoire_ids=( `python3 ${GATEWAY_UTIL_DIR}/preprocess-json.py $data_file Cell $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) elif [ ${ANALYSIS_TYPE} = "clone_file" ] then # preprocess-json.py dumps a field of interest from a JSON data file. We want # all of the reperotire_ids - we sort them to get unique ids and then # use awk to print them all on the same line to create an array # of repertoire_ids - repertoire_ids=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/preprocess-json.py $data_file Clone $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) + repertoire_ids=( `python3 ${GATEWAY_UTIL_DIR}/preprocess-json.py $data_file Clone $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) else echo "GW-ERROR: Do not know how to split repertoires for ${ANALYSIS_TYPE}" exit 1 @@ -373,16 +373,16 @@ function gateway_split_repertoire(){ # and store them in an array. The type is one of rearrangement_file, cell_file, clone_file if [ ${ANALYSIS_TYPE} = "rearrangement_file" ] then - data_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) + data_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) elif [ ${ANALYSIS_TYPE} = "clone_file" ] then - data_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) + data_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) elif [ ${ANALYSIS_TYPE} = "cell_file" ] then # Cell analyses have three different types of files to process. - data_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) - expression_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "expression_file"` ) - rearrangement_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "rearrangement_file"` ) + data_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} ${ANALYSIS_TYPE}` ) + expression_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "expression_file"` ) + rearrangement_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "rearrangement_file"` ) fi # Check to make sure we have some data files to process in the manifest file. @@ -393,11 +393,11 @@ function gateway_split_repertoire(){ fi # Get the repository from the manifest file. - repository_urls=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "repository_url"` ) + repository_urls=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "repository_url"` ) echo "GW-INFO: Repository URLs = ${repository_urls[@]}" # Get the Reperotire files from the manifest file. - repertoire_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "repertoire_file"` ) + repertoire_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${MANIFEST_FILE} "repertoire_file"` ) echo "GW-INFO: Repertoire files = ${repertoire_files[@]}" # For each repository, process the data from it. @@ -424,21 +424,21 @@ function gateway_split_repertoire(){ # all of the reperotire_ids - we sort them to get unique ids and then # use awk to print them all on the same line to create an array # of repertoire_ids - repertoire_ids=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/preprocess.py $data_file $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) + repertoire_ids=( `python3 ${GATEWAY_UTIL_DIR}/preprocess.py $data_file $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) elif [ ${ANALYSIS_TYPE} = "cell_file" ] then # preprocess-json.py dumps a field of interest from a JSON data file. We want # all of the reperotire_ids - we sort them to get unique ids and then # use awk to print them all on the same line to create an array # of repertoire_ids - repertoire_ids=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/preprocess-json.py $data_file Cell $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) + repertoire_ids=( `python3 ${GATEWAY_UTIL_DIR}/preprocess-json.py $data_file Cell $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) elif [ ${ANALYSIS_TYPE} = "clone_file" ] then # preprocess-json.py dumps a field of interest from a JSON data file. We want # all of the reperotire_ids - we sort them to get unique ids and then # use awk to print them all on the same line to create an array # of repertoire_ids - repertoire_ids=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/preprocess-json.py $data_file Clone $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) + repertoire_ids=( `python3 ${GATEWAY_UTIL_DIR}/preprocess-json.py $data_file Clone $SPLIT_FIELD | sort -u | awk '{printf("%s ",$0)}'` ) else echo "GW-ERROR: Do not know how to split repertoires for ${ANALYSIS_TYPE}" exit 1 @@ -470,7 +470,7 @@ function gateway_split_repertoire(){ # Copy the HTML resources for the Apps echo "GW-INFO: Copying HTML assets" mkdir -p ${repository_name}/${repertoire_dirname}/assets - cp -r ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/assets/* ${repository_name}/${repertoire_dirname}/assets + cp -r ${GATEWAY_UTIL_DIR}/assets/* ${repository_name}/${repertoire_dirname}/assets if [ $? -ne 0 ] then echo "GW-ERROR: Could not create HTML asset directory" @@ -504,7 +504,7 @@ function gateway_split_repertoire(){ # Filter the input file $data_file and extract all records that have the given # repertoire_id in the SPLIT_FIELD. # Command line parameters: inputfile, field_name, field_value, outfile - python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/filter-json.py $data_file Clone ${SPLIT_FIELD} ${repertoire_id} ${repository_name}/${repertoire_dirname}/${repertoire_datafile} + python3 ${GATEWAY_UTIL_DIR}/filter-json.py $data_file Clone ${SPLIT_FIELD} ${repertoire_id} ${repository_name}/${repertoire_dirname}/${repertoire_datafile} if [ $? -ne 0 ] then echo "GW-ERROR: Could not filter Clone data for ${repertoire_id} from ${data_file}" @@ -534,7 +534,7 @@ function gateway_split_repertoire(){ # repertoire_id in the SPLIT_FIELD. # Command line parameters: inputfile, field_name, field_value, outfile echo "GW-INFO: Splitting Cell file ${data_file} by ${SPLIT_FIELD} ${repertoire_id}" - python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/filter-json.py $data_file Cell ${SPLIT_FIELD} ${repertoire_id} ${repository_name}/${repertoire_dirname}/${cell_datafile} + python3 ${GATEWAY_UTIL_DIR}/filter-json.py $data_file Cell ${SPLIT_FIELD} ${repertoire_id} ${repository_name}/${repertoire_dirname}/${cell_datafile} if [ $? -ne 0 ] then echo "GW-ERROR: Could not filter Cell data for ${repertoire_id} from ${data_file}" @@ -549,7 +549,7 @@ function gateway_split_repertoire(){ # Handle the rearrangement files. # First we get a set of unique linking field IDs from the Cell file, all on one line, space separated. # We expect only one of these per repertoire. - link_ids=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/preprocess-json.py ${repository_name}/${repertoire_dirname}/${cell_datafile} Cell ${LINK_FIELD} | sort -u | awk '{printf("%s ",$0)}'` ) + link_ids=( `python3 ${GATEWAY_UTIL_DIR}/preprocess-json.py ${repository_name}/${repertoire_dirname}/${cell_datafile} Cell ${LINK_FIELD} | sort -u | awk '{printf("%s ",$0)}'` ) if [ ${#link_ids[@]} != 1 ] then echo "GW-ERROR: Analysis expects a single ${LINK_FIELD} per Cell repertoire." @@ -564,7 +564,7 @@ function gateway_split_repertoire(){ echo "GW-INFO: Input file = ${rearrangement_file}" echo "GW-INFO: Output file = ${rearrangement_datafile}" echo "GW-INFO: Splitting Rearrangement file ${rearrangement_file} by ${LINK_FIELD} ${link_id}" - python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/filter.py $rearrangement_file ${LINK_FIELD} ${link_id} ${repository_name}/${repertoire_dirname}/${rearrangement_datafile} + python3 ${GATEWAY_UTIL_DIR}/filter.py $rearrangement_file ${LINK_FIELD} ${link_id} ${repository_name}/${repertoire_dirname}/${rearrangement_datafile} if [ $? -ne 0 ] then echo "GW-ERROR: Could not filter Rearrangement data for ${link_id} from ${rearrangement_file}" @@ -606,12 +606,18 @@ function gateway_split_repertoire(){ # # The code is run in a singularity container so that we can ensure # that all the requirements are present. - singularity exec --cleanenv --env PYTHONNOUSERSITE=1 \ - -B ${PWD}:/data -B ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}:/localsrc \ - ${GATEWAY_SINGULARITY} \ - python3 /localsrc/gateway-airr-to-h5ad.py \ - /data/${expression_file} \ - /data \ + #singularity exec --cleanenv --env PYTHONNOUSERSITE=1 \ + # -B ${PWD}:/data -B ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}:/localsrc \ + # ${GATEWAY_SINGULARITY} \ + # python3 /localsrc/gateway-airr-to-h5ad.py \ + # /data/${expression_file} \ + # /data \ + # 'CellExpression' \ + # ${SPLIT_FIELD} + + python3 ${GATEWAY_UTIL_DIR}/gateway-airr-to-h5ad.py \ + ${expression_file} \ + . \ 'CellExpression' \ ${SPLIT_FIELD} if [ $? -ne 0 ] @@ -685,7 +691,7 @@ function gateway_split_repertoire(){ done # Remove the temporary files/directories that remain. - rm -rf ${TMP_DIR} + #rm -rf ${TMP_DIR} echo -n "GW-INFO: Done splitting Rearrangement file ${data_file} - " date fi From ba5cdf90636e817a1ff1d6c442e7d751ad27407a Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 24 Feb 2023 15:24:50 -0800 Subject: [PATCH 004/221] New files for processing These should actually be in the analysis app container but for now they will live here. --- .../gateway_utilities/gateway-celltypist.py | 99 +++++++++++++++++++ .../gateway_utilities/h5ad-log1pnormalize.py | 66 +++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 public/gateway_utilities/gateway-celltypist.py create mode 100644 public/gateway_utilities/h5ad-log1pnormalize.py diff --git a/public/gateway_utilities/gateway-celltypist.py b/public/gateway_utilities/gateway-celltypist.py new file mode 100644 index 00000000..77317d62 --- /dev/null +++ b/public/gateway_utilities/gateway-celltypist.py @@ -0,0 +1,99 @@ +import sys +import os +import argparse + +from matplotlib import pyplot +import scanpy +import numpy +import celltypist +from celltypist import models + +def getArguments(): + # Set up the command line parser + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description="" + ) + + # The filename (with or without full path) for the input file. + parser.add_argument("input_file", help="The input Anndata file. This is expected to be normalized to a total of 10000 counts per cell and logarithmically scaled") + + # The output directory (full or relative path) for output + parser.add_argument("output_directory", help="The directory where the plotting and report files are saved.") + + # The output file (filename only) for the h5ad file. + parser.add_argument("output_file", help="The Anndata output filename, will be written to output_directory") + + # The output file (filename only) for the h5ad file. + parser.add_argument("title", help="The title to use for the figures.") + + # Handle verbose processing + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Run the program in verbose mode. This option will generate debug output.") + + + # Parse the command line arguements. + options = parser.parse_args() + return options + +if __name__ == "__main__": + # Get the command line arguments. + options = getArguments() + + # Output some cell typist info about the models + print("IR-INFO: CellTypist data models:") + print(models.models_description(), flush=True) + + # Load in a default model + model = models.Model.load(model = 'Immune_All_Low.pkl') + print("IR-INFO: Default model info:") + print(model) + print(model.cell_types, flush=True) + + # Read in the h5ad file + print("IR-INFO: Reading Cell file " + options.input_file, flush=True) + adata = scanpy.read(options.input_file) + + # We turn on the majority-voting classifier (majority_voting = True), which refines + # cell identities within local subclusters after an over-clustering approach at the + # cost of increased runtime. + # + # The results include both predicted cell type labels (predicted_labels), + # over-clustering result (over_clustering), and predicted labels after majority voting + # in local subclusters (majority_voting). Note in the predicted_labels, each query cell + # gets its inferred label by choosing the most probable cell type among all possible + # cell types in the given model. + predictions = celltypist.annotate(adata, model = 'Immune_All_Low.pkl', majority_voting = True) + + # Get an `AnnData` with predicted labels embedded into the cell metadata columns. + prediction_adata = predictions.to_adata() + print("IR-INFO: Prediction observations:") + print(prediction_adata.obs, flush=True) + print(prediction_adata.to_df(), flush=True) + + + # Export figures with labels external (celltypist plots are messy) + scanpy.tl.umap(prediction_adata) + scanpy.pl.umap(prediction_adata, color = ['predicted_labels'], title = options.title + ' (majority)') + pyplot.tight_layout() + pyplot.savefig(os.path.join(options.output_directory, 'predicted_labels_v2' + '.pdf')) + scanpy.pl.umap(prediction_adata, color = ['majority_voting'], title = 'CellTypist (majority vote)') + pyplot.tight_layout() + pyplot.savefig(os.path.join(options.output_directory, 'majority_voting_v2' + '.pdf')) + + # Write output + predictions.to_table(folder = options.output_directory, prefix="", xlsx = True) + + # Plot results + predictions.to_plots(folder = options.output_directory, plot_probability = True) + + # Write the h5ad file + print("IR-INFO: Writing output to " + options.output_directory + "/" + options.output_file, flush=True) + prediction_adata.write(options.output_directory + "/" + options.output_file) + + # Done + sys.exit(0) + diff --git a/public/gateway_utilities/h5ad-log1pnormalize.py b/public/gateway_utilities/h5ad-log1pnormalize.py new file mode 100644 index 00000000..257c6825 --- /dev/null +++ b/public/gateway_utilities/h5ad-log1pnormalize.py @@ -0,0 +1,66 @@ +import string +import sys +import argparse +import json +import time +import numpy +import scanpy + +def getArguments(): + # Set up the command line parser + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description="" + ) + parser = argparse.ArgumentParser() + + # The h5ad file to process + parser.add_argument("input_file") + # The h5ad file to write + parser.add_argument("output_file") + + # Request normaliztion of the counts for each cell to the given value. + parser.add_argument( + "--normalize_value", + dest="normalize_value", + type=float, + default=10000.0, + help="Request each cell count to be normalized to this value") + + # Parse the command line arguements. + options = parser.parse_args() + return options + +if __name__ == "__main__": + total_start = time.perf_counter() + # Get the command line arguments. + options = getArguments() + + print("IR-INFO: Reading h5ad file " + options.input_file, flush=True) + adata = scanpy.read(options.input_file) + + # Return success if successful + if adata is None: + print('ERROR: Unable to process h5adfile %s'%(options.input_file)) + sys.exit(1) + + # If normalization requested, normalize. Default normalize to 1. + print("IR-INFO: Normalizing cell counts to %f"%(options.normalize_value)) + scanpy.pp.normalize_total(adata, target_sum=options.normalize_value) + + # If log scaling requested, do it. + print("IR-INFO: Performing log scaling" ) + scanpy.pp.log1p(adata) + # scanpy has a bug, doesn't save "None" data into files. + # log1p sets base = None, so we want to change it toe numpy.e + adata.uns["log1p"]["base"]=numpy.e + + # Write the output to the output file. + print("IR-INFO: Writing file %s"%(options.output_file)) + adata.write(options.output_file) + print("IR-INFO: Done writing file %s"%(options.output_file)) + print(adata.to_df()) + total_end = time.perf_counter() + print('IR-INFO: Total time = %d s'%(total_end-total_start)) + + sys.exit(0) From 330f7d4479c7ec023d9b9cf7c22602f010ca26da Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 24 Feb 2023 15:26:26 -0800 Subject: [PATCH 005/221] Refactor for Tapis 3 Changes to use directories differently, no longer run jobs with singularity, the entire app is run in singularity. --- public/gateway_utilities/gateway_utilities.sh | 3 +- .../cell-celltypist-singularity/app3.sh | 54 +++++++++++-------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/public/gateway_utilities/gateway_utilities.sh b/public/gateway_utilities/gateway_utilities.sh index d0214c06..db7979cf 100644 --- a/public/gateway_utilities/gateway_utilities.sh +++ b/public/gateway_utilities/gateway_utilities.sh @@ -598,7 +598,6 @@ function gateway_split_repertoire(){ if [ ${ANALYSIS_TYPE} = "cell_file" ] then echo "GW-INFO: Splitting Expression file ${expression_file} by ${SPLIT_FIELD}" - echo "GW-INFO: Using ${GATEWAY_SINGULARITY} to split the file." # Split the GEX input file into N files one per repertoire, converting the # data from JSON to h5ad for downstream processing. Output goes in the @@ -615,7 +614,7 @@ function gateway_split_repertoire(){ # 'CellExpression' \ # ${SPLIT_FIELD} - python3 ${GATEWAY_UTIL_DIR}/gateway-airr-to-h5ad.py \ + python3 ${GATEWAY_UTIL_DIR}/gateway-airr-to-h5ad.py -v \ ${expression_file} \ . \ 'CellExpression' \ diff --git a/resources/agave_apps/cell-celltypist-singularity/app3.sh b/resources/agave_apps/cell-celltypist-singularity/app3.sh index 39f688e4..0a8adfcc 100755 --- a/resources/agave_apps/cell-celltypist-singularity/app3.sh +++ b/resources/agave_apps/cell-celltypist-singularity/app3.sh @@ -140,7 +140,7 @@ function run_analysis() echo "IR-INFO: Running a Cell Repertoire Analysis with manifest ${manifest_file}" # Get a list of rearrangement files to process from the manifest. - local cell_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "cell_file"` ) + local cell_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "cell_file"` ) if [ $? -ne 0 ] then echo "IR-ERROR: Could not process manifest file ${manifest_file}" @@ -154,7 +154,7 @@ function run_analysis() local cell_file_count=${#cell_files[@]} local cell_file=${cell_files[0]} echo "IR-INFO: Using cell file ${cell_file}" - local gex_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "expression_file"` ) + local gex_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "expression_file"` ) if [ ${#gex_files[@]} != 1 ] then echo "IR_ERROR: CellTypist cell analysis only works with a single expression file." @@ -162,7 +162,7 @@ function run_analysis() fi local gex_file=${gex_files[0]} echo "IR-INFO: Using gex file ${gex_file}" - local rearrangement_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "rearrangement_file"` ) + local rearrangement_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "rearrangement_file"` ) if [ ${#rearrangement_files[@]} != 1 ] then echo "IR_ERROR: CellTypist cell analysis only works with a single rearrangement file." @@ -173,8 +173,8 @@ function run_analysis() # Check to see if we are processing a specific repertoire_id if [ "${repertoire_id}" != "NULL" ]; then - file_string=`python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id} --separator "_"` - title_string="$(python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id})" + file_string=`python3 ${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id} --separator "_"` + title_string="$(python3 ${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id})" else file_string="total" title_string="Total" @@ -195,12 +195,15 @@ function run_analysis() echo -n "IR-INFO: log1p normalizing ${gex_file} - " date mv ${output_directory}/${gex_file} ${output_directory}/${gex_file}.tmp.h5ad - singularity exec --cleanenv --env PYTHONNOUSERSITE=1 \ - -B ${output_directory}:/data -B ${SCRIPT_DIR}:/localsrc \ - ${SCRIPT_DIR}/${singularity_image} python \ - /localsrc/h5ad-log1pnormalize.py\ - /data/${gex_file}.tmp.h5ad \ - /data/${gex_file} + #singularity exec --cleanenv --env PYTHONNOUSERSITE=1 \ + # -B ${output_directory}:/data -B ${SCRIPT_DIR}:/localsrc \ + # ${SCRIPT_DIR}/${singularity_image} python \ + # /localsrc/h5ad-log1pnormalize.py\ + # /data/${gex_file}.tmp.h5ad \ + # /data/${gex_file} + python3 ${GATEWAY_UTIL_DIR}/h5ad-log1pnormalize.py\ + ${output_directory}/${gex_file}.tmp.h5ad \ + ${output_directory}/${gex_file} if [ $? -ne 0 ] then echo "IR-ERROR: Could not log1p normalize the data" @@ -211,12 +214,17 @@ function run_analysis() # graphs and output. echo -n "IR-INFO: Running CellTpist on ${gex_file} - " date - singularity exec --cleanenv --env PYTHONNOUSERSITE=1 \ - -B ${output_directory}:/data -B ${SCRIPT_DIR}:/localsrc \ - ${SCRIPT_DIR}/${singularity_image} \ - python /localsrc/gateway-celltypist.py \ - /data/${gex_file} \ - /data \ + #singularity exec --cleanenv --env PYTHONNOUSERSITE=1 \ + # -B ${output_directory}:/data -B ${SCRIPT_DIR}:/localsrc \ + # ${SCRIPT_DIR}/${singularity_image} \ + # python /localsrc/gateway-celltypist.py \ + # /data/${gex_file} \ + # /data \ + # ${repertoire_id}-annotated.h5ad \ + # ${title_string} + python3 ${GATEWAY_UTIL_DIR}/gateway-celltypist.py \ + ${output_directory}/${gex_file} \ + ${output_directory} \ ${repertoire_id}-annotated.h5ad \ ${title_string} if [ $? -ne 0 ] @@ -281,10 +289,10 @@ function run_analysis() echo "IR-INFO: Done generating label file ${label_file}" # Remove the intermediate files generated for CellTypist - rm -f ${output_directory}/${CONTIG_PREFIX}.csv ${output_directory}/${CONTIG_PREFIX}_* + #rm -f ${output_directory}/${CONTIG_PREFIX}.csv ${output_directory}/${CONTIG_PREFIX}_* # We don't want to keep around the generated data files or the manifest file. - rm -f ${output_directory}/${cell_file} ${output_directory}/${gex_file} ${output_directory}/${rearrangement_file} ${output_directory}/${manifest_file} + #rm -f ${output_directory}/${cell_file} ${output_directory}/${gex_file} ${output_directory}/${rearrangement_file} ${output_directory}/${manifest_file} # done printf "IR-INFO: Done running Repertoire Analysis on ${cell_file} at $(date)\n" @@ -327,17 +335,17 @@ echo "IR-INFO: Done ZIPing analysis results - $(date)" # We don't want the analysis files to remain - they are in the ZIP file echo "IR-INFO: Removing analysis output" -rm -rf ${GATEWAY_ANALYSIS_DIR} +#rm -rf ${GATEWAY_ANALYSIS_DIR} # We don't want to copy around the singularity image everywhere. -rm -f ${singularity_image} +#rm -f ${singularity_image} # We don't want the iReceptor Utilities to be part of the results. -rm -rf ${GATEWAY_UTIL_DIR} +#rm -rf ${GATEWAY_UTIL_DIR} # Cleanup the input data files, don't want to return them as part of the resulting analysis echo "IR-INFO: Removing original ZIP file $ZIP_FILE" -rm -f $ZIP_FILE +#rm -f $ZIP_FILE # End printf "IR-INFO: DONE at $(date)\n\n" From ba082772378451b3924ba8ff5ef37729cb02aee3 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Mon, 27 Feb 2023 16:34:09 -0800 Subject: [PATCH 006/221] Use App code within the container Custom singularity container now contains the custom python code that is required for processing. No longer need to use external code for the App (other than the gateway utility code). --- resources/agave_apps/cell-celltypist-singularity/app3.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/resources/agave_apps/cell-celltypist-singularity/app3.sh b/resources/agave_apps/cell-celltypist-singularity/app3.sh index 0a8adfcc..0f7145cd 100755 --- a/resources/agave_apps/cell-celltypist-singularity/app3.sh +++ b/resources/agave_apps/cell-celltypist-singularity/app3.sh @@ -201,7 +201,8 @@ function run_analysis() # /localsrc/h5ad-log1pnormalize.py\ # /data/${gex_file}.tmp.h5ad \ # /data/${gex_file} - python3 ${GATEWAY_UTIL_DIR}/h5ad-log1pnormalize.py\ + ###python3 ${GATEWAY_UTIL_DIR}/h5ad-log1pnormalize.py\ + python3 /opt/ireceptor/h5ad-log1pnormalize.py\ ${output_directory}/${gex_file}.tmp.h5ad \ ${output_directory}/${gex_file} if [ $? -ne 0 ] @@ -222,7 +223,8 @@ function run_analysis() # /data \ # ${repertoire_id}-annotated.h5ad \ # ${title_string} - python3 ${GATEWAY_UTIL_DIR}/gateway-celltypist.py \ + ### python3 ${GATEWAY_UTIL_DIR}/gateway-celltypist.py \ + python3 /opt/ireceptor/gateway-celltypist.py \ ${output_directory}/${gex_file} \ ${output_directory} \ ${repertoire_id}-annotated.h5ad \ From 8bbf5f0efcd4aa6a9d285858ffb3015e47e6ea33 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Mon, 27 Feb 2023 16:45:04 -0800 Subject: [PATCH 007/221] Removed celltypist specific code. --- .../gateway_utilities/gateway-celltypist.py | 99 ------------------- .../gateway_utilities/h5ad-log1pnormalize.py | 66 ------------- 2 files changed, 165 deletions(-) delete mode 100644 public/gateway_utilities/gateway-celltypist.py delete mode 100644 public/gateway_utilities/h5ad-log1pnormalize.py diff --git a/public/gateway_utilities/gateway-celltypist.py b/public/gateway_utilities/gateway-celltypist.py deleted file mode 100644 index 77317d62..00000000 --- a/public/gateway_utilities/gateway-celltypist.py +++ /dev/null @@ -1,99 +0,0 @@ -import sys -import os -import argparse - -from matplotlib import pyplot -import scanpy -import numpy -import celltypist -from celltypist import models - -def getArguments(): - # Set up the command line parser - parser = argparse.ArgumentParser( - formatter_class=argparse.RawDescriptionHelpFormatter, - description="" - ) - - # The filename (with or without full path) for the input file. - parser.add_argument("input_file", help="The input Anndata file. This is expected to be normalized to a total of 10000 counts per cell and logarithmically scaled") - - # The output directory (full or relative path) for output - parser.add_argument("output_directory", help="The directory where the plotting and report files are saved.") - - # The output file (filename only) for the h5ad file. - parser.add_argument("output_file", help="The Anndata output filename, will be written to output_directory") - - # The output file (filename only) for the h5ad file. - parser.add_argument("title", help="The title to use for the figures.") - - # Handle verbose processing - parser.add_argument( - "-v", - "--verbose", - action="store_true", - help="Run the program in verbose mode. This option will generate debug output.") - - - # Parse the command line arguements. - options = parser.parse_args() - return options - -if __name__ == "__main__": - # Get the command line arguments. - options = getArguments() - - # Output some cell typist info about the models - print("IR-INFO: CellTypist data models:") - print(models.models_description(), flush=True) - - # Load in a default model - model = models.Model.load(model = 'Immune_All_Low.pkl') - print("IR-INFO: Default model info:") - print(model) - print(model.cell_types, flush=True) - - # Read in the h5ad file - print("IR-INFO: Reading Cell file " + options.input_file, flush=True) - adata = scanpy.read(options.input_file) - - # We turn on the majority-voting classifier (majority_voting = True), which refines - # cell identities within local subclusters after an over-clustering approach at the - # cost of increased runtime. - # - # The results include both predicted cell type labels (predicted_labels), - # over-clustering result (over_clustering), and predicted labels after majority voting - # in local subclusters (majority_voting). Note in the predicted_labels, each query cell - # gets its inferred label by choosing the most probable cell type among all possible - # cell types in the given model. - predictions = celltypist.annotate(adata, model = 'Immune_All_Low.pkl', majority_voting = True) - - # Get an `AnnData` with predicted labels embedded into the cell metadata columns. - prediction_adata = predictions.to_adata() - print("IR-INFO: Prediction observations:") - print(prediction_adata.obs, flush=True) - print(prediction_adata.to_df(), flush=True) - - - # Export figures with labels external (celltypist plots are messy) - scanpy.tl.umap(prediction_adata) - scanpy.pl.umap(prediction_adata, color = ['predicted_labels'], title = options.title + ' (majority)') - pyplot.tight_layout() - pyplot.savefig(os.path.join(options.output_directory, 'predicted_labels_v2' + '.pdf')) - scanpy.pl.umap(prediction_adata, color = ['majority_voting'], title = 'CellTypist (majority vote)') - pyplot.tight_layout() - pyplot.savefig(os.path.join(options.output_directory, 'majority_voting_v2' + '.pdf')) - - # Write output - predictions.to_table(folder = options.output_directory, prefix="", xlsx = True) - - # Plot results - predictions.to_plots(folder = options.output_directory, plot_probability = True) - - # Write the h5ad file - print("IR-INFO: Writing output to " + options.output_directory + "/" + options.output_file, flush=True) - prediction_adata.write(options.output_directory + "/" + options.output_file) - - # Done - sys.exit(0) - diff --git a/public/gateway_utilities/h5ad-log1pnormalize.py b/public/gateway_utilities/h5ad-log1pnormalize.py deleted file mode 100644 index 257c6825..00000000 --- a/public/gateway_utilities/h5ad-log1pnormalize.py +++ /dev/null @@ -1,66 +0,0 @@ -import string -import sys -import argparse -import json -import time -import numpy -import scanpy - -def getArguments(): - # Set up the command line parser - parser = argparse.ArgumentParser( - formatter_class=argparse.RawDescriptionHelpFormatter, - description="" - ) - parser = argparse.ArgumentParser() - - # The h5ad file to process - parser.add_argument("input_file") - # The h5ad file to write - parser.add_argument("output_file") - - # Request normaliztion of the counts for each cell to the given value. - parser.add_argument( - "--normalize_value", - dest="normalize_value", - type=float, - default=10000.0, - help="Request each cell count to be normalized to this value") - - # Parse the command line arguements. - options = parser.parse_args() - return options - -if __name__ == "__main__": - total_start = time.perf_counter() - # Get the command line arguments. - options = getArguments() - - print("IR-INFO: Reading h5ad file " + options.input_file, flush=True) - adata = scanpy.read(options.input_file) - - # Return success if successful - if adata is None: - print('ERROR: Unable to process h5adfile %s'%(options.input_file)) - sys.exit(1) - - # If normalization requested, normalize. Default normalize to 1. - print("IR-INFO: Normalizing cell counts to %f"%(options.normalize_value)) - scanpy.pp.normalize_total(adata, target_sum=options.normalize_value) - - # If log scaling requested, do it. - print("IR-INFO: Performing log scaling" ) - scanpy.pp.log1p(adata) - # scanpy has a bug, doesn't save "None" data into files. - # log1p sets base = None, so we want to change it toe numpy.e - adata.uns["log1p"]["base"]=numpy.e - - # Write the output to the output file. - print("IR-INFO: Writing file %s"%(options.output_file)) - adata.write(options.output_file) - print("IR-INFO: Done writing file %s"%(options.output_file)) - print(adata.to_df()) - total_end = time.perf_counter() - print('IR-INFO: Total time = %d s'%(total_end-total_start)) - - sys.exit(0) From d9801c64822305581cb8246b1bb3a6ef8a415c90 Mon Sep 17 00:00:00 2001 From: bcorrie Date: Tue, 28 Feb 2023 01:03:24 +0000 Subject: [PATCH 008/221] Tapis3 singularity cleanup Tapis3 all code is run inside the singularity container, so we no longer need to specify a container image. --- public/gateway_utilities/gateway_utilities.sh | 24 +------------------ 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/public/gateway_utilities/gateway_utilities.sh b/public/gateway_utilities/gateway_utilities.sh index db7979cf..94682b2a 100644 --- a/public/gateway_utilities/gateway_utilities.sh +++ b/public/gateway_utilities/gateway_utilities.sh @@ -330,7 +330,6 @@ function gateway_split_repertoire(){ # $3 - iReceptor ZIP file # $4 - Working directory # $5 - Type of analysis from the manifest (rearrangement_file, clone_file, cell_file) -# $6 - Singularity image to use echo "GW-INFO: ========================================" echo -n "GW-INFO: Splitting AIRR Repertoires at " @@ -351,13 +350,6 @@ function gateway_split_repertoire(){ ANALYSIS_TYPE=$5 fi echo "GW-INFO: Analysis type = ${ANALYSIS_TYPE}" - #local GATEWAY_SINGULARITY=gateway-singularity.sif - #if [ -z "$6" ]; then - # GATEWAY_SINGULARITY=gateway-singularity.sif - #else - # GATEWAY_SINGULARITY=$6 - #fi - #echo "GW-INFO: Gateway singularity image = ${GATEWAY_SINGULARITY}" # Unzip the iReceptor Gateway ZIP file into the working directory gateway_unzip ${ZIP_FILE} ${WORKING_DIR} @@ -603,22 +595,8 @@ function gateway_split_repertoire(){ # data from JSON to h5ad for downstream processing. Output goes in the # current directory. Output files are named $repertoire_id.h5ad. # - # The code is run in a singularity container so that we can ensure - # that all the requirements are present. - #singularity exec --cleanenv --env PYTHONNOUSERSITE=1 \ - # -B ${PWD}:/data -B ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}:/localsrc \ - # ${GATEWAY_SINGULARITY} \ - # python3 /localsrc/gateway-airr-to-h5ad.py \ - # /data/${expression_file} \ - # /data \ - # 'CellExpression' \ - # ${SPLIT_FIELD} - python3 ${GATEWAY_UTIL_DIR}/gateway-airr-to-h5ad.py -v \ - ${expression_file} \ - . \ - 'CellExpression' \ - ${SPLIT_FIELD} + ${expression_file} . 'CellExpression' ${SPLIT_FIELD} if [ $? -ne 0 ] then echo "GW-ERROR: Cell split failed on file ${expression_file}" From 42850a031f8e59ac57efd900df4e92893b5d9e57 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Mon, 27 Feb 2023 17:06:12 -0800 Subject: [PATCH 009/221] Clean up celltypist shell script Clean up singularity use - no longer needs to be explicit since everything runs in the container. Also clean up the Gateway utility directory, as it is more basic and clean now as well. It is provided on the compute machine and the app is told where it is with a parameter from the Gateway. --- .../cell-celltypist-singularity/app3.sh | 57 ++----------------- 1 file changed, 4 insertions(+), 53 deletions(-) diff --git a/resources/agave_apps/cell-celltypist-singularity/app3.sh b/resources/agave_apps/cell-celltypist-singularity/app3.sh index 0f7145cd..5a492732 100755 --- a/resources/agave_apps/cell-celltypist-singularity/app3.sh +++ b/resources/agave_apps/cell-celltypist-singularity/app3.sh @@ -31,12 +31,6 @@ AGAVE_JOB_MEMORY_PER_NODE=${AGAVE_JOB_MEMORY_PER_NODE} # this to gather iReceptor Gateway specific resources if needed. GATEWAY_URL="${ir_gateway_url}" -# We pass a singularity image to get from the Gateway. This image is provided -# on the Gateway because we only want to run singularity images that are approved -# by the gateway. -singularity_image="${singularity_image}" -echo "IR-INFO: Singularity image = ${singularity_image}" - # # Tapis App Inputs # @@ -55,26 +49,13 @@ export JOB_ERROR=1 ######################################################################## # Done Tapis setup/processing. ######################################################################## - echo "IR-INFO: Using Gateway ${GATEWAY_URL}" - -# Get the iRecpetor Gateway utilities from the Gateway -echo -n "IR-INFO: Downloading iReceptor Gateway Utilities from the Gateway - " -date +# Report where we get the Gateway utilities from GATEWAY_UTIL_DIR=${gateway_util_dir} -#mkdir -p ${GATEWAY_UTIL_DIR} -#pushd ${GATEWAY_UTIL_DIR} > /dev/null -#wget --no-verbose -r -nH --no-parent --cut-dir=1 --reject="index.html*" --reject="robots.txt*" ${GATEWAY_URL}/gateway_utilities/ -#popd > /dev/null -#echo -n "IR-INFO: Done downloading iReceptor Gateway Utilities - " -#date -echo "IR-INFO: Using Gateway Utilities from ${GATEWAY_UTIL_DIR}" -#ln -s ${GATEWAY_UTIL_DIR} - +echo "IR-INFO: Using iReceptor Gateway Utilities from ${GATEWAY_UTIL_DIR}" # Load the iReceptor Gateway bash utility functions. -#source ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/gateway_utilities.sh source ${GATEWAY_UTIL_DIR}/gateway_utilities.sh if [ $? -ne 0 ] then @@ -92,17 +73,6 @@ if [ -z "${GATEWAY_ANALYSIS_DIR}" ]; then fi echo "IR-INFO: Done loading iReceptor Gateway Utilities" -# Load any modules that are required by the App. -#module load singularity -#module load scipy-stack - -# Get the singularity image from the Gateway -#echo -n "IR-INFO: Downloading singularity image ${singularity_image} from the Gateway - " -#date -#gateway_get_singularity ${singularity_image} ${SCRIPT_DIR} -#echo -n "IR-INFO: Done ownloading singularity image from the Gateway - " -#date - # The Gateway provides information about the download in the file info.txt INFO_FILE="info.txt" MANIFEST_FILE="AIRR-manifest.json" @@ -185,7 +155,7 @@ function run_analysis() # TODO: Fix this, it should not be required. title_string=`echo ${title_string} | sed "s/[ ]//g"` - # Run the CellTypist pipeline within the singularity image on each rearrangement file provided. + # Run the CellTypist pipeline on each rearrangement file provided. echo "IR-INFO: Running CellTypist on $gex_file" echo "IR-INFO: Mapping ${PWD} to /data" echo "IR-INFO: Asking for ${AGAVE_JOB_PROCESSORS_PER_NODE} threads" @@ -195,13 +165,6 @@ function run_analysis() echo -n "IR-INFO: log1p normalizing ${gex_file} - " date mv ${output_directory}/${gex_file} ${output_directory}/${gex_file}.tmp.h5ad - #singularity exec --cleanenv --env PYTHONNOUSERSITE=1 \ - # -B ${output_directory}:/data -B ${SCRIPT_DIR}:/localsrc \ - # ${SCRIPT_DIR}/${singularity_image} python \ - # /localsrc/h5ad-log1pnormalize.py\ - # /data/${gex_file}.tmp.h5ad \ - # /data/${gex_file} - ###python3 ${GATEWAY_UTIL_DIR}/h5ad-log1pnormalize.py\ python3 /opt/ireceptor/h5ad-log1pnormalize.py\ ${output_directory}/${gex_file}.tmp.h5ad \ ${output_directory}/${gex_file} @@ -215,15 +178,6 @@ function run_analysis() # graphs and output. echo -n "IR-INFO: Running CellTpist on ${gex_file} - " date - #singularity exec --cleanenv --env PYTHONNOUSERSITE=1 \ - # -B ${output_directory}:/data -B ${SCRIPT_DIR}:/localsrc \ - # ${SCRIPT_DIR}/${singularity_image} \ - # python /localsrc/gateway-celltypist.py \ - # /data/${gex_file} \ - # /data \ - # ${repertoire_id}-annotated.h5ad \ - # ${title_string} - ### python3 ${GATEWAY_UTIL_DIR}/gateway-celltypist.py \ python3 /opt/ireceptor/gateway-celltypist.py \ ${output_directory}/${gex_file} \ ${output_directory} \ @@ -315,7 +269,7 @@ function run_analysis() # - Run the analysis on each repertoire, calling run_analysis for each # - Cleanup the intermediate files created by the split process. # run_analysis() is defined above. -gateway_split_repertoire ${INFO_FILE} ${MANIFEST_FILE} ${ZIP_FILE} ${GATEWAY_ANALYSIS_DIR} "cell_file" ${SCRIPT_DIR}/${singularity_image} +gateway_split_repertoire ${INFO_FILE} ${MANIFEST_FILE} ${ZIP_FILE} ${GATEWAY_ANALYSIS_DIR} "cell_file" gateway_run_analysis ${INFO_FILE} ${MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} "cell_file" gateway_cleanup ${ZIP_FILE} ${MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} @@ -339,9 +293,6 @@ echo "IR-INFO: Done ZIPing analysis results - $(date)" echo "IR-INFO: Removing analysis output" #rm -rf ${GATEWAY_ANALYSIS_DIR} -# We don't want to copy around the singularity image everywhere. -#rm -f ${singularity_image} - # We don't want the iReceptor Utilities to be part of the results. #rm -rf ${GATEWAY_UTIL_DIR} From d07d3c103030a9d664a3999dc02d77d06bfe6278 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Tue, 28 Feb 2023 09:33:32 -0800 Subject: [PATCH 010/221] Unzip error should be a fatal error, exit. --- public/gateway_utilities/gateway_utilities.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/gateway_utilities/gateway_utilities.sh b/public/gateway_utilities/gateway_utilities.sh index 94682b2a..2f2c597c 100644 --- a/public/gateway_utilities/gateway_utilities.sh +++ b/public/gateway_utilities/gateway_utilities.sh @@ -75,7 +75,7 @@ function gateway_unzip() { if [ $? -ne 0 ] then echo "GW-ERROR: Could not unzip ${ZIP_FILE} to ${WORKING_DIR}" - return + exit 1 fi echo "GW-INFO: Extracting files finished at: `date`" From ed8e4369270f1dfcb38ec907d8f57f0c9228cec4 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Tue, 28 Feb 2023 11:34:09 -0800 Subject: [PATCH 011/221] Initial version of Conga App shell script --- .../agave_apps/cell-conga-singularity/app3.sh | 352 ++++++++++++++++++ 1 file changed, 352 insertions(+) create mode 100755 resources/agave_apps/cell-conga-singularity/app3.sh diff --git a/resources/agave_apps/cell-conga-singularity/app3.sh b/resources/agave_apps/cell-conga-singularity/app3.sh new file mode 100755 index 00000000..9e156e93 --- /dev/null +++ b/resources/agave_apps/cell-conga-singularity/app3.sh @@ -0,0 +1,352 @@ +# +# Wrapper script for running app through the iReceptor Gateway. +# + +# Get the script directory where all the code is. +SCRIPT_DIR=${_tapisExecSystemExecDir} +echo "IR-INFO: Running job from ${SCRIPT_DIR}" +echo "IR-INFO: PYTHONNOUSERSITE = ${PYTHONNOUSERSITE}" +ls / + +######################################################################## +# Tapis configuration/settings +######################################################################## + +# +# Tapis/AGAVE job variables. +# + +AGAVE_JOB_ID=${AGAVE_JOB_ID} +AGAVE_JOB_NAME=${AGAVE_JOB_NAME} +AGAVE_LOG_NAME=${AGAVE_JOB_NAME}-${AGAVE_JOB_ID} +AGAVE_JOB_PROCESSORS_PER_NODE=${AGAVE_JOB_PROCESSORS_PER_NODE} +AGAVE_JOB_MEMORY_PER_NODE=${AGAVE_JOB_MEMORY_PER_NODE} + +# +# Tapis App Parameters: Will be subsituted by Tapis. If they don't exist +# use command line arguments so we can test from the command line. +# + +# Tapis parameter ir_gateway_url contains the URL of the source gateway. Use +# this to gather iReceptor Gateway specific resources if needed. +GATEWAY_URL="${ir_gateway_url}" + +# +# Tapis App Inputs +# + +# Download file is a ZIP archive that is provided by the Gateway and contains +# the results of the users query. This is the data that is being analyzed. +if [ -z "${download_file}" ]; then + ZIP_FILE=$1 +else + ZIP_FILE=${download_file} +fi + +######################################################################## +# Done Tapis setup/processing. +######################################################################## +echo "IR-INFO: Using Gateway ${GATEWAY_URL}" + +# Report where we get the Gateway utilities from +GATEWAY_UTIL_DIR=${gateway_util_dir} +echo "IR-INFO: Using iReceptor Gateway Utilities from ${GATEWAY_UTIL_DIR}" + +# Load the iReceptor Gateway bash utility functions. +source ${GATEWAY_UTIL_DIR}/gateway_utilities.sh +if [ $? -ne 0 ] +then + echo "IR-ERROR: Could not load GATEWAY UTILIIES" + exit 1 +fi + +# This directory is defined in the gateway_utilities.sh. The Gateway +# relies on this being set. If it isn't set, abort as something has +# gone wrong with loading the Gateway utilties. +echo "IR-INFO: Gateway analysis directory = ${GATEWAY_ANALYSIS_DIR}" +if [ -z "${GATEWAY_ANALYSIS_DIR}" ]; then + echo "IR-ERROR: GATEWAY_ANALYSIS_DIR not defined, gateway_utilities not loaded correctly." + exit 1 +fi +echo "IR-INFO: Done loading iReceptor Gateway Utilities" + +# The Gateway provides information about the download in the file info.txt +INFO_FILE="info.txt" +MANIFEST_FILE="AIRR-manifest.json" + +# Start +printf "IR-INFO: \nIR-INFO: \n" +printf "IR-INFO: START at $(date)\n" +printf "IR-INFO: PROCS = ${AGAVE_JOB_PROCESSORS_PER_NODE}\n" +printf "IR-INFO: MEM = ${AGAVE_JOB_MEMORY_PER_NODE}\n" +printf "IR-INFO: SLURM JOB ID = ${SLURM_JOB_ID}\n" +printf "IR-INFO: " +lscpu | grep "Model name" +printf "IR-INFO: \nIR-INFO: \n" + +# This function is called by the iReceptor Gateway utilities function gateway_run_analysis +# The gateway utility function splits all data into repertoires and then calls this function +# for a single repertoire. As such, this function should perform all analysis required for a +# repertoire. +function run_analysis() +# Parameters: +# $1 output directory +# $2 repository name [string] +# $3 repertoire_id ("NULL" if should skip repertoire processing) +# $4 repertoire file (Not used if repertoire_id == NULL) +# $5 manifest file +# $6 analysis type +{ + # Use local variables - no scope issues please... + local output_directory=$1 + local repository_name=$2 + local repertoire_id=$3 + local repertoire_file=$4 + local manifest_file=$5 + local analysis_type=$6 + echo "IR-INFO: Running a Cell Repertoire Analysis on ${repertoire_id}" + echo "IR-INFO: Using manifest file ${manifest_file}" + + # Get a list of rearrangement files to process from the manifest. + local cell_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "cell_file"` ) + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not process manifest file ${manifest_file}" + echo "IR-ERROR: Processing for repertoire ${repertoire_id} not completed." + return + fi + if [ ${#cell_files[@]} != 1 ] + then + echo "IR_ERROR: Conga cell analysis only works with a single cell file." + echo "IR-ERROR: Processing for repertoire ${repertoire_id} not completed." + return + fi + local cell_file_count=${#cell_files[@]} + local cell_file=${cell_files[0]} + echo "IR-INFO: Using cell file ${cell_file}" + local gex_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "expression_file"` ) + if [ ${#gex_files[@]} != 1 ] + then + echo "IR_ERROR: Conga cell analysis only works with a single expression file." + echo "IR-ERROR: Processing for repertoire ${repertoire_id} not completed." + return + fi + local gex_file=${gex_files[0]} + echo "IR-INFO: Using gex file ${gex_file}" + local rearrangement_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "rearrangement_file"` ) + if [ ${#rearrangement_files[@]} != 1 ] + then + echo "IR_ERROR: Conga cell analysis only works with a single rearrangement file." + echo "IR-ERROR: Processing for repertoire ${repertoire_id} not completed." + return + fi + local rearrangement_file=${rearrangement_files[0]} + echo "IR-INFO: Using rearrangement file ${rearrangement_files}" + + # Check to see if we are processing a specific repertoire_id + if [ "${repertoire_id}" != "NULL" ]; then + file_string=`python3 ${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id} --separator "_"` + title_string="$(python3 ${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id})" + else + file_string="total" + title_string="Total" + fi + + # Clean up special characters in file and title strings. + file_string=`echo ${repository_name}_${file_string} | sed "s/[!@#$%^&*() :/-]/_/g"` + # TODO: Fix this, it should not be required. + title_string=`echo ${title_string} | sed "s/[ ]//g"` + + # Run the Conga pipeline within the singularity image on each rearrangement file provided. + echo "IR-INFO: Running Conga on $cell_file" + echo "IR-INFO: Mapping ${PWD} to /data" + echo "IR-INFO: Asking for ${AGAVE_JOB_PROCESSORS_PER_NODE} threads" + echo "IR-INFO: Storing output in /data/${output_directory}" + + # Convert Rearrangement file to a 10X Contig file. This uses code in the container + # for this App that is specific to iReceptor. + CONTIG_PREFIX=10x-contig + python3 /gitrepos/conga/ireceptor/rearrangements-to-10x.py ${output_directory}/${rearrangement_file} ${output_directory}/${CONTIG_PREFIX}.csv + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not generate rearrangements" + echo "IR-ERROR: Processing for repertoire ${repertoire_id} (${title_string}) not completed." + return + fi + + # Get the field that links cell data to rearrangement data. + repertoire_link_field=`python3 ${GATEWAY_UTIL_DIR}/repertoire_field.py --json_filename ${repertoire_file} --repertoire_field data_processing.data_processing_id --repertoire_id ${repertoire_id}` + + # Get the first N cellds from the cell data file. We use the adc_annotation_cell_id + # field to check against the rerrangement cell_id. + cells_to_check=20 + cell_ids=`python3 ${GATEWAY_UTIL_DIR}/preprocess-json.py ${output_directory}/${cell_file} Cell adc_annotation_cell_id | head -${cells_to_check} | awk '{if (NR>1) printf("|%s", $1); else printf("%s", $1)}'` + + # Get the column number of the v_call field in the rearrangement file. + column_header='v_call' + column_number=`cat ${output_directory}/${rearrangement_file} | head -n 1 | awk -F"\t" -v label=${column_header} '{for(i=1;i<=NF;i++){if ($i == label){print i}}}'` + + # Check the first N cell's in the rearrangement file and extract the list + # of cell type in the data (IG or TR) + repertoire_locus=( `egrep "${cell_ids}" ${output_directory}/${rearrangement_file} | cut -f ${column_number} | tail --lines=+2 | awk '{printf("%s\n", substr($1,0,2))}' | sort -u | awk '{printf("%s ",$0)}'` ) + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not get a cell type for repertoire ${repertoire_id}" + echo "IR-ERROR: Processing for repertoire ${repertoire_id} (${title_string}) not completed." + return + fi + + # Check to see if there is only one cell type in the data. + if [ ${#repertoire_locus[@]} != 1 ] + then + echo "IR-ERROR: Conga cell analysis requires a single cell type (repertoire_id = ${repertoire_id}, cell types = ${repertoire_locus[@]})." + echo "IR-ERROR: Processing for repertoire ${repertoire_id} (${title_string}) not completed." + return + fi + + # If there is only one, check to see if it is TR cell type, if so then we are good, + # if not it is an error. + repertoire_locus=${repertoire_locus[0]} + + if [ "${repertoire_locus}" == "TR" ] + then + conga_type="human" + # Code to add when Conga's IG processing gets better. + #elif [ "${repertoire_locus}" == "IG" ] + #then + # conga_type="human_ig" + else + echo "IR-ERROR: Conga cell analysis can only run on TR repertoires (repertoire_id = ${repertoire_id}, cell type = ${repertoire_locus})." + echo "IR-ERROR: Processing for repertoire ${repertoire_id} (${title_string}) not completed." + return + fi + echo "IR-INFO: Column header = ${column_header}" + echo "IR-INFO: Column number = ${column_number}" + echo "IR-INFO: Locus = ${repertoire_locus[@]}" + echo "IR-INFO: Data Processing ID = ${repertoire_link_field}" + echo "IR-INFO: Conga analysis type = ${conga_type}" + + # Run Conga setup for processing. + #singularity exec --cleanenv --env PYTHONNOUSERSITE=1 -B ${PWD}:/data ${SCRIPT_DIR}/${singularity_image} python3 /gitrepos/conga/scripts/setup_10x_for_conga.py --filtered_contig_annotations_csvfile /data/${output_directory}/${CONTIG_PREFIX}.csv --organism ${conga_type} + python3 /gitrepos/conga/scripts/setup_10x_for_conga.py --filtered_contig_annotations_csvfile ${PWD}/${output_directory}/${CONTIG_PREFIX}.csv --organism ${conga_type} + if [ $? -ne 0 ] + then + echo "IR-ERROR: Conga setup_10x_for_conga failed on ${output_directory}/${CONTIG_PREFIX}.csv" + echo "IR-ERROR: Processing for repertoire ${repertoire_id} (${title_string}) not completed." + return + fi + + # Run Conga proper on the data. + #singularity exec --cleanenv --env PYTHONNOUSERSITE=1 -B ${PWD}:/data ${SCRIPT_DIR}/${singularity_image} python3 /gitrepos/conga/scripts/run_conga.py --all --organism ${conga_type} --clones_file /data/${output_directory}/${CONTIG_PREFIX}_tcrdist_clones.tsv --gex_data /data/${output_directory}/${gex_file} --gex_data_type h5ad --outfile_prefix /data/${output_directory}/${file_string} + python3 /gitrepos/conga/scripts/run_conga.py --all --organism ${conga_type} --clones_file ${PWD}/${output_directory}/${CONTIG_PREFIX}_tcrdist_clones.tsv --gex_data ${PWD}/${output_directory}/${gex_file} --gex_data_type h5ad --outfile_prefix ${PWD}/${output_directory}/${file_string} + if [ $? -ne 0 ] + then + echo "IR-ERROR: Conga failed on ${CONTIG_PREFIX}_tcrdist_clones.tsv and ${gex_file}" + echo "IR-ERROR: Processing for repertoire ${repertoire_id} not completed." + return + fi + + # Generate a summary HTML file for the Gateway to present this info to the user + html_file=${output_directory}/${repertoire_id}.html + + # Generate the HTML main block + printf '\n' > ${html_file} + printf '' >> ${html_file} + + # Generate a normal looking iReceptor header + printf '\n' >> ${html_file} + cat ${output_directory}/assets/head-template.html >> ${html_file} + printf "Conga: %s\n" ${title_string} >> ${html_file} + printf '\n' >> ${html_file} + + # Generate an iReceptor top bar for the page + cat ${output_directory}/assets/top-bar-template.html >> ${html_file} + + # Generate a normal looking iReceptor header + printf '
' >> ${html_file} + printf "

Conga: %s

\n" ${title_string} >> ${html_file} + + printf "

Analysis

\n" >> ${html_file} + + printf "

Conga Analysis: %s

\n" ${title_string} >> ${html_file} + sed -i 's/gex_clusters_tcrdist_trees.png/gex_clusters_tcrdist_trees.svg/g' ${output_directory}/${file_string}_results_summary.html + sed -i 's/conga_threshold_tcrdist_tree.png/conga_threshold_tcrdist_tree.svg/g' ${output_directory}/${file_string}_results_summary.html + printf '\n' ${file_string}_results_summary.html >> ${html_file} + + # End of main div container + printf '
' >> ${html_file} + + # Use the normal iReceptor footer. + cat ${output_directory}/assets/footer.html >> ${html_file} + + # Generate end body end HTML + printf '' >> ${html_file} + printf '' >> ${html_file} + + + # Copy the Conga summary report to the gateway expected summary for this repertoire + #cp ${output_directory}/${file_string}_results_summary.html ${output_directory}/${repertoire_id}.html + # Add the required label file for the Gateway to present the results as a summary. + label_file=${output_directory}/${repertoire_id}.txt + echo "${title_string}" > ${label_file} + + # Remove the intermediate files generated for Conga + rm -f ${output_directory}/${CONTIG_PREFIX}.csv ${output_directory}/${CONTIG_PREFIX}_* + rm -f ${output_directory}/features.tsv.gz ${output_directory}/barcodes.tsv.gz ${output_directory}/matrix.mtx.gz ${output_directory}/matrix.mtx.tmp + + # We don't want to keep around the generated data files or the manifest file. + rm -f ${output_directory}/${cell_file} ${output_directory}/${gex_file} ${output_directory}/${rearrangement_file} ${output_directory}/${manifest_file} + + # done + printf "IR-INFO: Done running Repertoire Analysis on ${cell_file} at $(date)\n\n" +} + +# Split the data by repertoire. This creates a directory tree in $GATEWAY_ANALYSIS_DIR +# with a directory per repository and within that a directory per repertoire in +# that repository. In each repertoire directory there will exist an AIRR manifest +# file and the data (as described in the manifest file) from that repertoire. +# +# The gateway utilities use a callback mechanism, calling the +# function run_analysis() on each repertoire. The run_analysis function +# is locally provided and should do all of the processing for a single +# repertoire. +# +# So the pipeline is: +# - Split the data into repertoire directories as described above +# - Run the analysis on each repertoire, calling run_analysis for each +# - Cleanup the intermediate files created by the split process. +# run_analysis() is defined above. +gateway_split_repertoire ${INFO_FILE} ${MANIFEST_FILE} ${ZIP_FILE} ${GATEWAY_ANALYSIS_DIR} "cell_file" +gateway_run_analysis ${INFO_FILE} ${MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} "cell_file" +gateway_cleanup ${ZIP_FILE} ${MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} + +# Make sure we are back where we started, although the gateway functions should +# not change the working directory that we are in. +cd ${SCRIPT_DIR} + +# We want to move the info.txt file to the main directory. +cp ${GATEWAY_ANALYSIS_DIR}/${INFO_FILE} . + +# We want to keep the job error and output files as part of the analysis output. +cp *.err ${GATEWAY_ANALYSIS_DIR} +cp *.out ${GATEWAY_ANALYSIS_DIR} + +# Zip up the analysis results for easy download +echo "IR-INFO: ZIPing analysis results - $(date)" +zip -r ${GATEWAY_ANALYSIS_DIR}.zip ${GATEWAY_ANALYSIS_DIR} +echo "IR-INFO: Done ZIPing analysis results - $(date)" + +# We don't want the analysis files to remain - they are in the ZIP file +echo "IR-INFO: Removing analysis output" +#rm -rf ${GATEWAY_ANALYSIS_DIR} + +# We don't want the iReceptor Utilities to be part of the results. +#rm -rf ${GATEWAY_UTIL_DIR} + +# Cleanup the input data files, don't want to return them as part of the resulting analysis +echo "IR-INFO: Removing original ZIP file $ZIP_FILE" +#rm -f $ZIP_FILE + +# End +printf "IR-INFO: DONE at $(date)\n\n" + From a4880b020eea3898424e087c659e821a4e107c54 Mon Sep 17 00:00:00 2001 From: bcorrie Date: Sat, 4 Mar 2023 02:17:50 +0000 Subject: [PATCH 012/221] Cleaned up a few old bits of code --- resources/agave_apps/cell-celltypist-singularity/app3.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/resources/agave_apps/cell-celltypist-singularity/app3.sh b/resources/agave_apps/cell-celltypist-singularity/app3.sh index 5a492732..db3c8d7d 100755 --- a/resources/agave_apps/cell-celltypist-singularity/app3.sh +++ b/resources/agave_apps/cell-celltypist-singularity/app3.sh @@ -3,7 +3,6 @@ # # Get the script directory where all the code is. -SCRIPT_DIR=`pwd` SCRIPT_DIR=${_tapisExecSystemExecDir} echo "IR-INFO: Running job from ${SCRIPT_DIR}" ls / @@ -43,9 +42,6 @@ else ZIP_FILE=${download_file} fi -# If you want to tell Tapis that the job failed -export JOB_ERROR=1 - ######################################################################## # Done Tapis setup/processing. ######################################################################## From f9117ac73b4ce8ef7b28aa36a887d7897f4a2e71 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 3 Mar 2023 18:31:18 -0800 Subject: [PATCH 013/221] Initial verson for Tapis3 --- .../agave_apps/immunarch-singularity/app3.sh | 310 ++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100755 resources/agave_apps/immunarch-singularity/app3.sh diff --git a/resources/agave_apps/immunarch-singularity/app3.sh b/resources/agave_apps/immunarch-singularity/app3.sh new file mode 100755 index 00000000..c76a346f --- /dev/null +++ b/resources/agave_apps/immunarch-singularity/app3.sh @@ -0,0 +1,310 @@ +# +# Wrapper script for running app through the iReceptor Gateway. +# + +# Get the script directory where all the code is. +SCRIPT_DIR=${_tapisExecSystemExecDir} +echo "IR-INFO: Running job from ${SCRIPT_DIR}" +ls / + +######################################################################## +# Tapis configuration/settings +######################################################################## + +# +# Tapis/AGAVE job variables. +# + +AGAVE_JOB_ID=${AGAVE_JOB_ID} +AGAVE_JOB_NAME=${AGAVE_JOB_NAME} +AGAVE_LOG_NAME=${AGAVE_JOB_NAME}-${AGAVE_JOB_ID} +AGAVE_JOB_PROCESSORS_PER_NODE=${AGAVE_JOB_PROCESSORS_PER_NODE} +AGAVE_JOB_MEMORY_PER_NODE=${AGAVE_JOB_MEMORY_PER_NODE} + +# +# Tapis App Parameters: Will be subsituted by Tapis. If they don't exist +# use command line arguments so we can test from the command line. +# + +# +# Tapis App Parameters: Will be subsituted by Tapis. If they don't exist +# use command line arguments so we can test from the command line. +# + +# Tapis parameter ir_gateway_url contains the URL of the source gateway. Use +# this to gather iReceptor Gateway specific resources if needed. +GATEWAY_URL="${ir_gateway_url}" + +# +# Tapis App Inputs +# + +# Download file is a ZIP archive that is provided by the Gateway and contains +# the results of the users query. This is the data that is being analyzed. +if [ -z "${download_file}" ]; then + ZIP_FILE=$1 +else + ZIP_FILE=${download_file} +fi + +# Many of our TAPIS Apps have a split_reperotire variable, so to keep thinks +# consistent we define it here if it isn't provided by the App. +# Immunarch by default splits repertoires. +if [ -z "${split_repertoire}" ]; then + split_repertoire="True" +fi + +######################################################################## +# Done Tapis setup/processing. +######################################################################## + +echo "IR-INFO: Using Gateway ${GATEWAY_URL}" + +# Report where we get the Gateway utilities from +GATEWAY_UTIL_DIR=${gateway_util_dir} +echo "IR-INFO: Using iReceptor Gateway Utilities from ${GATEWAY_UTIL_DIR}" + +# Load the iReceptor Gateway bash utility functions. +source ${GATEWAY_UTIL_DIR}/gateway_utilities.sh +if [ $? -ne 0 ] +then + echo "IR-ERROR: Could not load GATEWAY UTILIIES" + exit 1 +fi + +# This directory is defined in the gateway_utilities.sh. The Gateway +# relies on this being set. If it isn't set, abort as something has +# gone wrong with loading the Gateway utilties. +echo "IR-INFO: Gateway analysis directory = ${GATEWAY_ANALYSIS_DIR}" +if [ -z "${GATEWAY_ANALYSIS_DIR}" ]; then + echo "IR-ERROR: GATEWAY_ANALYSIS_DIR not defined, gateway_utilities not loaded correctly." + exit 1 +fi +echo "IR-INFO: Done loading iReceptor Gateway Utilities" + +# The Gateway provides information about the download in the file info.txt +INFO_FILE="info.txt" +MANIFEST_FILE="AIRR-manifest.json" + +# Start +printf "IR-INFO:\n" +printf "IR-INFO: START at $(date)\n" +printf "IR-INFO: PROCS = ${AGAVE_JOB_PROCESSORS_PER_NODE}\n" +printf "IR-INFO: MEM = ${AGAVE_JOB_MEMORY_PER_NODE}\n" +printf "IR-INFO: SLURM JOB ID = ${SLURM_JOB_ID}\n" +printf "IR-INFO: \n" + + +# This function is called by the iReceptor Gateway utilities function gateway_split_repertoire +# The gateway utility function splits all data into repertoires and then calls this function +# for a single repertoire. As such, this function should perform all analysis required for a +# repertoire. +function run_analysis() +# Parameters: +# $1 output directory +# $2 repository name [string] +# $3 repertoire_id ("NULL" if should skip repertoire processing) +# $4 repertoire file (Not used if repertoire_id == NULL) +# $5 manifest file +{ + # Use local variables - no scope issues please... + local output_directory=$1 + local repository_name=$2 + local repertoire_id=$3 + local repertoire_file=$4 + local manifest_file=$5 + echo "IR-INFO: Running a Repertoire Analysis with manifest ${manifest_file}" + echo "IR-INFO: Working directory = ${output_directory}" + echo "IR-INFO: Repository name = ${repository_name}" + echo "IR-INFO: Repertoire id = ${repertoire_id}" + echo "IR-INFO: Repertoire file = ${repertoire_file}" + echo "IR-INFO: Manifest file = ${manifest_file}" + echo -n "IR-INFO: Current diretory = " + pwd + + # Get a list of rearrangement files to process from the manifest. + local array_of_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "rearrangement_file"` ) + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not process manifest file ${manifest_file}" + return + fi + echo "IR-INFO: Using files ${array_of_files[@]}" + + + # Check to see if we are processing a specific repertoire_id + if [ "${repertoire_id}" != "Total" ]; then + # Set the R program if we are doing a repertoire by repertoire analysis. + r_program='immunarch.R' + title_string="$(python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id})" + else + # Set the R program if we are doing a comparative analysis. + r_program='immunarch_group.R' + title_string="Total" + fi + + # TODO: Fix this, it should not be required. + title_string=`echo ${title_string} | sed "s/[ ]//g"` + + # Create an Immunarch data directory. Immunarch likes to run on all + # files in the directory, so we don't want any extraneous files in + # the directory where Immunarch is running. + mkdir ${PWD}/${output_directory}/data + for filename in "${array_of_files[@]}"; do + echo "IR-INFO: Running ImmunArch on $filename" + echo "IR-INFO: Asking for ${AGAVE_JOB_PROCESSORS_PER_NODE} threads" + echo "IR-INFO: Mapping ${PWD} to /data" + echo "IR-INFO: Input data = /data/${output_directory}/data" + echo "IR-INFO: Storing output in /data/${output_directory}" + + # Immunarch is very permissive, it tries to process everything in the directory. + # We want it only to process the data files, so we create a temporary directory + # for this so immunarch doesn't try and do other weird things like analyze images + mv ${PWD}/${output_directory}/${filename} ${PWD}/${output_directory}/data/${filename} + + # Run ImmunArch + #singularity exec -e -B ${PWD}:/data -B ${SCRIPT_DIR}:/localsrc ${SCRIPT_DIR}/${singularity_image} Rscript /localsrc/${r_program} /data/${output_directory}/data /data/${output_directory} + Rscript ${SCRIPT_DIR}/${r_program} ${PWD}/${output_directory}/data ${PWD}/${output_directory} + if [ $? -ne 0 ] + then + echo "IR-ERROR: Immunarch failed on file ${output_directory}" + return + fi + + # Remove the repertoire TSV file, we don't want to keep it around as part of the analysis results. + rm -f ${PWD}/${output_directory}/data/${filename} + + # Remove the generated manifest file. + rm -f ${manifest_file} + + # Generate a label file for the Gateway to use to present this info to the user + label_file=${output_directory}/${repertoire_id}.txt + echo "${title_string}" > ${label_file} + + # Generate a summary output report for the analysis for the + # gateway to use as a summary. + html_file=${output_directory}/${repertoire_id}.html + + # Generate the HTML main block + printf '\n' > ${html_file} + printf '' >> ${html_file} + + # Generate a normal looking iReceptor header + printf '\n' >> ${html_file} + cat ${output_directory}/assets/head-template.html >> ${html_file} + printf "Immunarch: %s\n" ${title_string} >> ${html_file} + printf '\n' >> ${html_file} + + # Generate an iReceptor top bar for the page + cat ${output_directory}/assets/top-bar-template.html >> ${html_file} + + # Generate a normal looking iReceptor header + printf '
' >> ${html_file} + + # Generate the output from the analysis. + printf "

Immunarch: %s

\n" ${title_string} >> ${html_file} + printf "

Analysis

\n" >> ${html_file} + printf "

Top Clones

\n" >> ${html_file} + printf '\n' top_10_clones.html >> ${html_file} + # The below would be more elegant but it is HTML5 and doesn't work + #printf "

Top Clones 2

\n" >> ${html_file} + #printf '\n' top_10_clones.html >> ${html_file} + printf '\n' clonal_homeo.png >> ${html_file} + printf '\n' clonal_rare.png >> ${html_file} + printf '\n' count.png >> ${html_file} + printf '\n' gene_family_usage_normalized.png >> ${html_file} + printf '\n' gene_usage_normalized.png >> ${html_file} + printf '\n' len.png >> ${html_file} + + # End of main div container + printf '
' >> ${html_file} + + # Use the normal iReceptor footer. + cat ${output_directory}/assets/footer.html >> ${html_file} + + # Generate end body end HTML + printf '' >> ${html_file} + printf '' >> ${html_file} + + done + # Remove the Immunarch required data directory + rmdir ${PWD}/${output_directory}/data + printf "IR-INFO: Done Repertoire Analysis on ${array_of_files[@]} at $(date)\n\n" +} + +if [ "${split_repertoire}" = "True" ]; then + echo -e "IR-INFO:\nIR-INFO: Splitting data by Repertoire" + echo "IR-INFO:" + # Split the data by repertoire. This creates a directory tree in $GATEWAY_ANALYSIS_DIR + # with a directory per repository and within that a directory per repertoire in + # that repository. In each repertoire directory there will exist an AIRR TSV + # file with the data from that repertoire. + # + # This gateway utility function uses a callback mechanism, calling the + # function run_analysis() on each repertoire. The run_analysis function takes + # as paramenters the TSV files to process, the directory for the repertoire in + # which to store the analysis results, the a string repersenting the repository + # from which the data came, the repertoire_id, and a repertoire JSON file in which + # information about the repertoire can be found. + # + # run_analysis() is defined above. + gateway_split_repertoire ${INFO_FILE} ${MANIFEST_FILE} ${ZIP_FILE} ${GATEWAY_ANALYSIS_DIR} + gateway_run_analysis ${INFO_FILE} ${MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} + gateway_cleanup ${ZIP_FILE} ${MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} + +elif [ "${split_repertoire}" = "False" ]; then + echo -e "IR-INFO:\nIR-INFO: Running app on entire data set" + echo "IR-INFO:" + + # Output directory is called "Total" + # Run the analysis with a token repository name of "ADC" since the + # analysis is being run on data from the entire ADC. + # repertoire_id and repository should be "NULL" + # Lastly, provide the list of TSV files to process. Remember that + # the array elements are expanded into separate parameters, which + # the run_analyis function handles. + outdir="Total" + + # Run the stats on all the data combined. Unzip the files + gateway_unzip ${ZIP_FILE} ${GATEWAY_ANALYSIS_DIR}/${outdir} + + # Run the stats analysis. + run_analysis ${GATEWAY_ANALYSIS_DIR}/${outdir} "AIRRDataCommons" ${outdir} "NULL" ${GATEWAY_ANALYSIS_DIR}/${outdir}/${MANIFEST_FILE} + +else + echo "IR-ERROR: Unknown repertoire operation ${split_repertoire}" >&2 + exit 1 +fi + + +#gateway_split_repertoire ${INFO_FILE} ${MANIFEST_FILE} ${ZIP_FILE} ${GATEWAY_ANALYSIS_DIR} + +# Make sure we are back where we started, although the gateway functions should +# not change the working directory that we are in. +cd ${SCRIPT_DIR} + +# We want to move the info.txt to the main directory. The Gateway expects this. +cp ${GATEWAY_ANALYSIS_DIR}/${INFO_FILE} . + +# We want the job error and output files to be part of the analysis so copy them +cp *.err ${GATEWAY_ANALYSIS_DIR} +cp *.out ${GATEWAY_ANALYSIS_DIR} + +# Zip up the analysis results for easy download +echo "IR-INFO: ZIPing analysis results" +zip -r ${GATEWAY_ANALYSIS_DIR}.zip ${GATEWAY_ANALYSIS_DIR} + +# We don't want the analysis files to remain - they are in the ZIP file +echo "IR-INFO: Removing analysis output" +#rm -rf ${GATEWAY_ANALYSIS_DIR} + +# We don't want the iReceptor Utilities to be part of the results. +#rm -rf ${GATEWAY_UTIL_DIR} + +# Cleanup the input data files, don't want to return them as part of the resulting analysis +echo "IR-INFO: Removing original ZIP file $ZIP_FILE" +#rm -f $ZIP_FILE + +# End +printf "IR-INFO: DONE at $(date)\n\n" + From 26e6d81f8ab55778a4ab26b327e10567938294b8 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Wed, 8 Mar 2023 09:28:28 -0800 Subject: [PATCH 014/221] Working version of Tapis 3 shell script --- resources/agave_apps/immunarch-singularity/app3.sh | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/resources/agave_apps/immunarch-singularity/app3.sh b/resources/agave_apps/immunarch-singularity/app3.sh index c76a346f..eec34320 100755 --- a/resources/agave_apps/immunarch-singularity/app3.sh +++ b/resources/agave_apps/immunarch-singularity/app3.sh @@ -123,7 +123,7 @@ function run_analysis() pwd # Get a list of rearrangement files to process from the manifest. - local array_of_files=( `python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "rearrangement_file"` ) + local array_of_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "rearrangement_file"` ) if [ $? -ne 0 ] then echo "IR-ERROR: Could not process manifest file ${manifest_file}" @@ -131,12 +131,11 @@ function run_analysis() fi echo "IR-INFO: Using files ${array_of_files[@]}" - # Check to see if we are processing a specific repertoire_id if [ "${repertoire_id}" != "Total" ]; then # Set the R program if we are doing a repertoire by repertoire analysis. r_program='immunarch.R' - title_string="$(python3 ${SCRIPT_DIR}/${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id})" + title_string="$(python3 ${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id})" else # Set the R program if we are doing a comparative analysis. r_program='immunarch_group.R' @@ -152,10 +151,10 @@ function run_analysis() mkdir ${PWD}/${output_directory}/data for filename in "${array_of_files[@]}"; do echo "IR-INFO: Running ImmunArch on $filename" - echo "IR-INFO: Asking for ${AGAVE_JOB_PROCESSORS_PER_NODE} threads" - echo "IR-INFO: Mapping ${PWD} to /data" + echo "IR-INFO: Asking for ${AGAVE_JOB_PROCESSORS_PER_NODE} threads" + echo "IR-INFO: Mapping ${PWD} to /data" echo "IR-INFO: Input data = /data/${output_directory}/data" - echo "IR-INFO: Storing output in /data/${output_directory}" + echo "IR-INFO: Storing output in /data/${output_directory}" # Immunarch is very permissive, it tries to process everything in the directory. # We want it only to process the data files, so we create a temporary directory @@ -163,8 +162,7 @@ function run_analysis() mv ${PWD}/${output_directory}/${filename} ${PWD}/${output_directory}/data/${filename} # Run ImmunArch - #singularity exec -e -B ${PWD}:/data -B ${SCRIPT_DIR}:/localsrc ${SCRIPT_DIR}/${singularity_image} Rscript /localsrc/${r_program} /data/${output_directory}/data /data/${output_directory} - Rscript ${SCRIPT_DIR}/${r_program} ${PWD}/${output_directory}/data ${PWD}/${output_directory} + Rscript /opt/ireceptor/${r_program} ${PWD}/${output_directory}/data ${PWD}/${output_directory} if [ $? -ne 0 ] then echo "IR-ERROR: Immunarch failed on file ${output_directory}" From d97b5c9e4b226a67c4896d78c60b13e8ed3aa760 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Thu, 9 Mar 2023 16:06:38 -0800 Subject: [PATCH 015/221] Added a few more info statements. --- public/gateway_utilities/gateway_utilities.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/public/gateway_utilities/gateway_utilities.sh b/public/gateway_utilities/gateway_utilities.sh index 2f2c597c..15177990 100644 --- a/public/gateway_utilities/gateway_utilities.sh +++ b/public/gateway_utilities/gateway_utilities.sh @@ -651,6 +651,7 @@ function gateway_split_repertoire(){ echo "GW-ERROR: Could not find ${SPLIT_FIELD} column in ${data_file}" continue fi + echo "GW-INFO: Using column ${repertoire_id_column} for ${SPLIT_FIELD}" # Split the file into N files based on SPLIT_FIELD. # AWK is pretty efficient at this @@ -660,11 +661,14 @@ function gateway_split_repertoire(){ echo "GW-ERROR: Could not split ${data_file} on field ${SPLIT_FIELD}" continue fi + echo "GW-INFO: Files generated by split:" + wc -l ${TMP_DIR}/* # Move the file from its temp location to its final location. for repertoire_id in "${repertoire_ids[@]}"; do echo "GW-INFO: Moving ${TMP_DIR}/${repertoire_id}.tsv to ${repository_name}/${repertoire_id}/" - mv ${TMP_DIR}/${repertoire_id}.tsv ${repository_name}/${repertoire_id}/ + #mv ${TMP_DIR}/${repertoire_id}.tsv ${repository_name}/${repertoire_id}/ + cp ${TMP_DIR}/${repertoire_id}.tsv ${repository_name}/${repertoire_id}/ done # Remove the temporary files/directories that remain. From 3d7c053b30663ed2717084df6e448252e26da8a4 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Thu, 9 Mar 2023 16:07:33 -0800 Subject: [PATCH 016/221] Initial working version of Historgram App --- resources/agave_apps/histogram/app3.sh | 355 +++++++++++++++++++++++++ 1 file changed, 355 insertions(+) create mode 100755 resources/agave_apps/histogram/app3.sh diff --git a/resources/agave_apps/histogram/app3.sh b/resources/agave_apps/histogram/app3.sh new file mode 100755 index 00000000..05d3c3fd --- /dev/null +++ b/resources/agave_apps/histogram/app3.sh @@ -0,0 +1,355 @@ +# +# Wrapper script for running app through the iReceptor Gateway. +# + +echo "IR-INFO: iReceptor Historgram App" + +# Get the script directory where all the code is. +SCRIPT_DIR=${_tapisExecSystemExecDir} +echo "IR-INFO: Running job from ${SCRIPT_DIR}" +ls / + +######################################################################## +# Tapis configuration/settings +######################################################################## + +# +# Tapis/AGAVE job variables. +# +AGAVE_JOB_ID=${AGAVE_JOB_ID} +AGAVE_JOB_NAME=${AGAVE_JOB_NAME} +AGAVE_LOG_NAME=${AGAVE_JOB_NAME}-${AGAVE_JOB_ID} +AGAVE_JOB_PROCESSORS_PER_NODE=${AGAVE_JOB_PROCESSORS_PER_NODE} +AGAVE_JOB_MEMORY_PER_NODE=${AGAVE_JOB_MEMORY_PER_NODE} + +# +# Tapis ENV variables expected +# +ZIP_FILE=${download_file} +# Tapis parameter ir_gateway_url contains the URL of the source gateway. Use +# this to gather iReceptor Gateway specific resources if needed. +GATEWAY_URL="${ir_gateway_url}" + +# +# Tapis App Parameters: Will be subsituted by Tapis on the command line to the singularity +# command that is executed. +# +SPLIT_REPERTOIRE=$1 +VARNAME=$2 +NUM_VALUES=$3 +SORT_VALUES=$4 + +############################################## +# Set up Gateway Utilities +############################################## +echo "IR-INFO: Using Gateway ${GATEWAY_URL}" + +# Report where we get the Gateway utilities from +GATEWAY_UTIL_DIR=${gateway_util_dir} +echo "IR-INFO: Using iReceptor Gateway Utilities from ${GATEWAY_UTIL_DIR}" + +# Load the iReceptor Gateway utilities functions. +source ${GATEWAY_UTIL_DIR}/gateway_utilities.sh +if [ $? -ne 0 ] +then + echo "IR-ERROR: Could not load GATEWAY UTILIIES" + exit 1 +fi + +# This directory is defined in the gateway_utilities.sh. The Gateway +# relies on this being set. If it isn't set, abort as something has +# gone wrong with loading the Gateway utilties. +echo "IR-INFO: Gateway analysis directory = ${GATEWAY_ANALYSIS_DIR}" +if [ -z "${GATEWAY_ANALYSIS_DIR}" ]; then + echo "IR-ERROR: GATEWAY_ANALYSIS_DIR not defined, gateway_utilities not loaded correctly." >&2 + exit 1 +fi +echo "IR-INFO: Done loading iReceptor Gateway Utilities" + +######################################################################### +# Application variables (will be subsituted by Tapis). If they don't exist +# use command line arguments. +######################################################################### + +#else +#fi + +# Start +printf "IR-INFO: \nIR-INFO: \n" +printf "IR-INFO: START at $(date)\n" +printf "IR-INFO: PROCS = ${AGAVE_JOB_PROCESSORS_PER_NODE}\n" +printf "IR-INFO: MEM = ${AGAVE_JOB_MEMORY_PER_NODE}\n" +printf "IR-INFO: SLURM JOB ID = ${SLURM_JOB_ID}\n" +printf "IR-INFO: ZIP FILE = ${ZIP_FILE}\n" +printf "IR-INFO: SPLIT_REPERTOIRE = ${SPLIT_REPERTOIRE}\n" +printf "IR-INFO: VARNAME = ${VARNAME}\n" +printf "IR-INFO: NUM_VALUES = ${NUM_VALUES}\n" +printf "IR-INFO: SORT_VALUES = ${SORT_VALUES}\n" + +printf "IR-INFO: \nIR-INFO: \n" + +echo "IR-INFO: Running histogram on variable ${VARNAME}" + +######################################################################### +# Code to do the analysis +######################################################################### + +function do_histogram() +# Parameters: +# $1 is variable_name to process +# $2 output directory +# $3 name of processing object (use to tag file) +# $4 title of processing object (use in title of graph) +# $5-$N remaining arguments are files to process. +{ + # Get the local variables to use + local variable_name=$1 + local output_dir=$2 + local file_tag=$3 + local title=$4 + shift + shift + shift + shift + # Remaining variable are the files to process + echo "IR-INFO: File tag = $file_tag" + echo "IR-INFO: title = $title" + echo "IR-INFO: Using files $@" + local array_of_files=( $@ ) + echo "IR-INFO: Using files ${array_of_files[@]}" + + # Use a temporary file for output + TMP_FILE=${output_dir}/tmp.tsv + + # preprocess input files -> tmp.csv + echo "IR-INFO: " + echo "IR-INFO: Histogram started at: `date`" + echo -n "IR-INFO: Working from directory: " + pwd + echo "IR-INFO: Output directory ${output_dir}" + echo "IR-INFO: Extracting ${variable_name}" + echo "IR-INFO: Extracting from ${array_of_files[@]}" + echo "IR-INFO: Extracting into $TMP_FILE" + echo ${variable_name} > $TMP_FILE + for filename in "${array_of_files[@]}"; do + echo "IR-INFO: Extracting ${variable_name} from $filename" + python3 ${GATEWAY_UTIL_DIR}/preprocess.py ${output_dir}/$filename ${variable_name} >> $TMP_FILE + done + + # Generate the image file name. + OFILE_BASE="${file_tag}-${variable_name}" + PNG_OFILE=${output_dir}/${OFILE_BASE}-histogram.png + TSV_OFILE=${output_dir}/${OFILE_BASE}-histogram.tsv + + # Debugging output + echo "IR-INFO: Input file = $TMP_FILE" + echo "IR-INFO: Variable = ${variable_name}" + echo "IR-INFO: Graph output file = $PNG_OFILE" + echo "IR-INFO: Data output file = $TSV_OFILE" + + # Run the python histogram command + python3 /ireceptor/airr_histogram.py ${variable_name} $TMP_FILE $PNG_OFILE $TSV_OFILE ${SORT_VALUES} ${NUM_VALUES} "${title},${variable_name}" + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not generate histogram for ${title}" + exit $? + fi + + # change permissions + chmod 644 $PNG_OFILE + chmod 644 $TSV_OFILE + + # Remove the temporary file. + #rm -f $TMP_FILE +} + +function run_analysis() +# Parameters: +# $1 output directory +# $2 repository name [string] +# $3 repertoire_id ("NULL" if should skip repertoire processing) +# $4 repertoire file (Not used if repertoire_id == NULL) +# $5 manifest file +{ + # Use local variables - no scope issues please... + local output_directory=$1 + local repository_name=$2 + local repertoire_id=$3 + local repertoire_file=$4 + local manifest_file=$5 + echo "IR-INFO: Running a Repertoire Analysis with manifest ${manifest_file}" + echo "IR-INFO: Working directory = ${output_directory}" + echo "IR-INFO: Repository name = ${repository_name}" + echo "IR-INFO: Repertoire id = ${repertoire_id}" + echo "IR-INFO: Repertoire file = ${repertoire_file}" + echo "IR-INFO: Manifest file = ${manifest_file}" + echo -n "IR-INFO: Current diretory = " + pwd + + # Get a list of rearrangement files to process from the manifest. + local array_of_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "rearrangement_file"` ) + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not process manifest file ${manifest_file}" + return + fi + echo "IR-INFO: Using files ${array_of_files[@]}" + + # Check to see if we are processing a specific repertoire_id + if [ "${repertoire_id}" != "Total" ]; then + file_string=`python3 ${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id} --separator "_"` + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not generate repertoire summary from ${repertoire_file}" + return + fi + + title_string="$(python3 ${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id})" + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not generate repertoire summary from ${repertoire_file}" + return + fi + else + file_string="Total" + title_string="Total" + fi + # Clean up special characters in file and title strings. + file_string=`echo ${repository_name}_${file_string} | sed "s/[!@#$%^&*() :/-]/_/g"` + # TODO: Fix this, it should not be required. + title_string=`echo ${title_string} | sed "s/[ ]//g"` + echo "IR-INFO: file_string = ${file_string}" + echo "IR-INFO: title_string = ${title_string}" + + # Generate the histogram + echo "IR-INFO: Using files ${array_of_files[@]}" + do_histogram ${VARNAME} $output_directory $file_string $title_string ${array_of_files[@]} + + # Remove the TSV files, we don't want to return them + for filename in "${array_of_files[@]}"; do + echo "IR_INFO: Not removing file $output_directory/$filename" + #rm -f $output_directory/$filename + done + + # Generate a label file for the Gateway to use to present this info to the user + label_file=${output_directory}/${repertoire_id}.txt + echo "${title_string}" > ${label_file} + + # Generate a summary HTML file for the Gateway to present this info to the user + html_file=${output_directory}/${repertoire_id}.html + + # Generate the HTML main block + printf '\n' > ${html_file} + printf '' >> ${html_file} + + # Generate a normal looking iReceptor header + printf '\n' >> ${html_file} + cat ${output_directory}/assets/head-template.html >> ${html_file} + printf "Histogram: %s\n" ${title_string} >> ${html_file} + printf '\n' >> ${html_file} + + # Generate an iReceptor top bar for the page + cat ${output_directory}/assets/top-bar-template.html >> ${html_file} + + # Generate a normal looking iReceptor header + printf '
' >> ${html_file} + + printf "

Histogram: %s

\n" ${title_string} >> ${html_file} + printf "

Analysis

\n" >> ${html_file} + printf '' ${file_string} ${VARNAME} >> ${html_file} + + # End of main div container + printf '
' >> ${html_file} + + # Use the normal iReceptor footer. + cat ${output_directory}/assets/footer.html >> ${html_file} + + # Generate end body end HTML + printf '' >> ${html_file} + printf '' >> ${html_file} +} + +# Set up the required variables. An iReceptor Gateway download consists +# of both an "info.txt" file that describes the download as well as an +# AIRR manifest JSON file that describes the relationships between +# AIRR Repertoire JSON files and AIRR TSV files. +INFO_FILE="info.txt" +MANIFEST_FILE="AIRR-manifest.json" + +if [ "${SPLIT_REPERTOIRE}" = "True" ]; then + echo -e "IR-INFO: \nIR-INFO: Splitting data by Repertoire\n" + # Split the download into single repertoire files, with a directory per + # repository and within that a directory per repertoire. This expects the + # user to define a function called run_analysis() that will be + # called for each repertoire. See the docs in the gateway_utilities.sh file + # for parameters to this function. + gateway_split_repertoire ${INFO_FILE} ${MANIFEST_FILE} ${ZIP_FILE} ${GATEWAY_ANALYSIS_DIR} + gateway_run_analysis ${INFO_FILE} ${MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} + gateway_cleanup ${ZIP_FILE} ${MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} + + +elif [ "${SPLIT_REPERTOIRE}" = "False" ]; then + echo -e "IR-INFO: \nIR-INFO: Running app on entire data set\n" + # Output directory is called "Total" + # Run the analysis with a token repository name of "ADC" since the + # analysis is being run on data from the entire ADC. + # repertoire_id and repository should be "NULL" + # Lastly, provide the list of TSV files to process. Remember that + # the array elements are expanded into separate parameters, which + # the run_analyis function handles. + outdir="Total" + + # Run the stats on all the data combined. Unzip the files + gateway_unzip ${ZIP_FILE} ${GATEWAY_ANALYSIS_DIR}/${outdir} + + # Copy the HTML resources for the Apps + echo "IR-INFO: Copying HTML assets" + mkdir -p ${GATEWAY_ANALYSIS_DIR}/${outdir}/assets + cp -r ${GATEWAY_UTIL_DIR}/assets/* ${GATEWAY_ANALYSIS_DIR}/${outdir}/assets + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not create HTML asset directory" + fi + + # Run the Histogram analysis. + run_analysis ${GATEWAY_ANALYSIS_DIR}/${outdir} "AIRRDataCommons" ${outdir} "NULL" ${GATEWAY_ANALYSIS_DIR}/${outdir}/${MANIFEST_FILE} + + # Copy the INFO_FILE to the analysis DIR as the Gateway expects it to be there. + cp ${GATEWAY_ANALYSIS_DIR}/${outdir}/${INFO_FILE} ${GATEWAY_ANALYSIS_DIR}/ + + +else + echo "IR-ERROR: Unknown repertoire operation ${SPLIT_REPERTOIRE}" >&2 + exit 1 +fi + +# Make sure we are back where we started, although the gateway functions should +# not change the working directory that we are in. +cd ${SCRIPT_DIR} + +# We want to move the info.txt to the main directory as the Gateway uses it if +# it is available. +cp ${GATEWAY_ANALYSIS_DIR}/${INFO_FILE} . + +# We want to keep the job error and output files as part of the analysis output. +#cp *.err ${GATEWAY_ANALYSIS_DIR} +cp output/*.out ${GATEWAY_ANALYSIS_DIR} + +# ZIP up the analysis results for easy download +zip -r ${GATEWAY_ANALYSIS_DIR}.zip ${GATEWAY_ANALYSIS_DIR} + +# We don't want the iReceptor Utilities to be part of the results. +echo "IR-INFO: Removing Gateway utilities" +#rm -rf ${GATEWAY_UTIL_DIR} + +# We don't want the analysis files to remain - they are in the ZIP file +echo "IR-INFO: Removing analysis output" +#rm -rf ${GATEWAY_ANALYSIS_DIR} + +# Cleanup the input data files, don't want to return them as part of the resulting analysis +echo "IR-INFO: Removing original ZIP file $ZIP_FILE" +#rm -f $ZIP_FILE + +# Debugging output, print data/time when shell command is finished. +echo "IR-INFO: Histogram finished at: `date`" + From df83e8cd0ed82d6f11cb7aac46ed09f0215bb6e9 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 10 Mar 2023 10:07:41 -0800 Subject: [PATCH 017/221] Initial Statistics App shell script --- resources/agave_apps/stats/app3.sh | 408 +++++++++++++++++++++++++++++ 1 file changed, 408 insertions(+) create mode 100644 resources/agave_apps/stats/app3.sh diff --git a/resources/agave_apps/stats/app3.sh b/resources/agave_apps/stats/app3.sh new file mode 100644 index 00000000..eabb3b2a --- /dev/null +++ b/resources/agave_apps/stats/app3.sh @@ -0,0 +1,408 @@ +#!/bin/bash +# +# Wrapper script for running app through the iReceptor Gateway. +# + +echo "IR-INFO: iReceptor Statistics App - starting at: `date`" + +# Get the script directory where all the code is. +SCRIPT_DIR=${_tapisExecSystemExecDir} +echo "IR-INFO: Running job from ${SCRIPT_DIR}" +ls / + +# +# Tapis App Parameters: Will be on the singularity command line to +# the App. +# +split_repertoire=$1 + +# Tapis parameter ir_gateway_url contains the URL of the source gateway. Use +# this to gather iReceptor Gateway specific resources if needed. +# +# Tapis ENV variables expected +# +ZIP_FILE=${download_file} +# Tapis parameter ir_gateway_url contains the URL of the source gateway. Use +# this to gather iReceptor Gateway specific resources if needed. +GATEWAY_URL="${ir_gateway_url}" + +############################################## +# Set up Gateway Utilities +############################################## +echo "IR-INFO: Using Gateway ${GATEWAY_URL}" + +# Report where we get the Gateway utilities from +GATEWAY_UTIL_DIR=${gateway_util_dir} +echo "IR-INFO: Using iReceptor Gateway Utilities from ${GATEWAY_UTIL_DIR}" + +# Load the iReceptor Gateway utilities functions. +source ${GATEWAY_UTIL_DIR}/gateway_utilities.sh +if [ $? -ne 0 ] +then + echo "IR-ERROR: Could not load GATEWAY UTILIIES" + exit 1 +fi + +# This directory is defined in the gateway_utilities.sh. The Gateway +# relies on this being set. If it isn't set, abort as something has +# gone wrong with loading the Gateway utilties. +echo "IR-INFO: Gateway analysis directory = ${GATEWAY_ANALYSIS_DIR}" +if [ -z "${GATEWAY_ANALYSIS_DIR}" ]; then + echo "IR-ERROR: GATEWAY_ANALYSIS_DIR not defined, gateway_utilities not loaded correctly." >&2 + exit 1 +fi +echo "IR-INFO: Done loading iReceptor Gateway Utilities" + +# Start +printf "IR-INFO:\n" +printf "IR-INFO: START at $(date)\n" +printf "IR-INFO: PROCS = ${AGAVE_JOB_PROCESSORS_PER_NODE}\n" +printf "IR-INFO: MEM = ${AGAVE_JOB_MEMORY_PER_NODE}\n" +printf "IR-INFO: SLURM JOB ID = ${SLURM_JOB_ID}\n" +printf "IR-INFO: \n" + +######################################################################### +# Code to do the analysis +######################################################################### +function do_heatmap() +# $1,$2 are variable names to process +# $3 output directory +# $4 name of processing object (use to tag file) +# $5 title of processing object (use in title of graph) +# $6-$N remaining arguments are files to process. +{ + # Get the local variables to use + local variable1=$1 + local variable2=$2 + local output_dir=$3 + local file_tag=$4 + local title=$5 + shift + shift + shift + shift + shift + # Remaining variable are the files to process + local array_of_files=( $@ ) + # Temporary file for data + TMP_FILE=${output_dir}/tmp.tsv + + # preprocess input files -> tmp.csv + echo "IR-INFO: Extracting ${variable1} and ${variable2} from files started at: `date`" + #rm -f $TMP_FILE + echo -e "${variable1}\t${variable2}" > $TMP_FILE + + for filename in "${array_of_files[@]}"; do + echo "IR-INFO: Extracting ${variable1} and ${variable2} from $filename" + # Get the columns numbers for the column labels of interest. + x_column=`cat ${output_dir}/$filename | head -n 1 | awk -F"\t" -v label=${variable1} '{for(i=1;i<=NF;i++){if ($i == label){print i}}}'` + y_column=`cat ${output_dir}/$filename | head -n 1 | awk -F"\t" -v label=${variable2} '{for(i=1;i<=NF;i++){if ($i == label){print i}}}'` + echo "IR-INFO: Columns = ${x_column}, ${y_column}" + + # Extract the two columns of interest. In the case of VDJ calls, we want the gene + # (not including the allele). As a result we chop things off at the first star. + # This also takes care of the case where a gene call has multiple calls. Since + # we drop everthing after the first allele we drop all of the other calls as well. + cat ${output_dir}/$filename | cut -f $x_column,$y_column | awk -v xlabel=${variable1} -v ylabel=${variable2} 'BEGIN {FS="\t"} /IG|TR/ {if (index($1,"*") == 0) {xstr = $1} else {xstr=substr($1,0,index($1,"*")-1)};if (index($2,"*") == 0) {ystr = $2} else {ystr=substr($2,0,index($2,"*")-1)};printf("%s\t%s\n",xstr,ystr)}' >> $TMP_FILE + done + # Generate a set of unique values that we can generate the heatmap on. This is a comma separated + # list of unique gene names for each of the two fields of interest. + xvals=`cat $TMP_FILE | cut -f 1 | awk 'BEGIN {FS=","} {if (NR>1) print($1)}' | sort | uniq | awk '{if (NR>1) printf(",%s", $1); else printf("%s", $1)}'` + yvals=`cat $TMP_FILE | cut -f 2 | awk 'BEGIN {FS=","} {if (NR>1) print($1)}' | sort | uniq | awk '{if (NR>1) printf(",%s", $1); else printf("%s", $1)}'` + + # Finally we generate a heatmap given all of the processed information. + echo "IR-INFO: ${variable1}" + echo "IR-INFO: ${variable2}" + echo "IR-INFO: $xvals" + echo "IR-INFO: $yvals" + PNG_OFILE=${output_dir}/${file_tag}-${variable1}-${variable2}-heatmap.png + TSV_OFILE=${output_dir}/${file_tag}-${variable1}-${variable2}-heatmap.tsv + + # Generate the heatmap + python3 /ireceptor/airr_heatmap.py ${variable1} ${variable2} $xvals $yvals $TMP_FILE $PNG_OFILE $TSV_OFILE "${title}(${variable1},${variable2})" + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not generate heatmap for ${variable1},${variable2}" + # Remove the temporary file. + #rm -f $TMP_FILE + return + fi + + # change permissions + chmod 644 "$PNG_OFILE" + chmod 644 "$TSV_OFILE" + + # Remove the temporary file. + #rm -f $TMP_FILE +} + +function do_histogram() +# Parameters: +# $1 is variable_name to process +# $2 output directory +# $3 name of processing object (use to tag file) +# $4 title of processing object (use in title of graph) +# $5-$N remaining arguments are files to process. +{ + # Get the local variables to use + local variable_name=$1 + local output_dir=$2 + local file_tag=$3 + local title=$4 + shift + shift + shift + shift + # Remaining variable are the files to process + local array_of_files=( $@ ) + + # Temporary file for data + TMP_FILE=${output_dir}/tmp.tsv + + # preprocess input files -> tmp.csv + echo "" + echo "IR-INFO: Histogram started at: `date`" + echo -n "IR-INFO: Working from directory: " + pwd + echo "IR-INFO: Extracting ${variable_name} from files started at: `date`" + rm -f $TMP_FILE + echo ${variable_name} > $TMP_FILE + for filename in "${array_of_files[@]}"; do + echo "IR-INFO: Extracting ${variable_name} from $filename" + python3 ${GATEWAY_UTIL_DIR}/preprocess.py ${output_dir}/$filename ${variable_name} >> $TMP_FILE + done + + # Generate the image file. + OFILE_BASE="${file_tag}-${variable_name}" + PNG_OFILE=${output_dir}/${OFILE_BASE}-histogram.png + TSV_OFILE=${output_dir}/${OFILE_BASE}-histogram.tsv + + # Debugging output + echo "IR-INFO: Input file = $TMP_FILE" + echo "IR-INFO: Variable = ${variable_name}" + echo "IR-INFO: Graph output file = $PNG_OFILE" + echo "IR-INFO: Data output file = $TSV_OFILE" + + # Run the python histogram command. We don't want to sort based on value + # (sort_values = False) and we use a num_values of 0, which denotes all values. + python3 /ireceptor/airr_histogram.py ${variable_name} $TMP_FILE $PNG_OFILE $TSV_OFILE False 0 "${title},${variable_name}" + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not generate histogram for ${variable_name}" + # Remove the temporary file. + #rm -f $TMP_FILE + return + fi + + + # change permissions + chmod 644 $PNG_OFILE + chmod 644 $TSV_OFILE + + # Remove the temporary file. + #rm -f $TMP_FILE +} + +function run_analysis() +# Parameters: +# $1 output directory +# $2 repository name [string] +# $3 repertoire_id ("NULL" if should skip repertoire processing) +# $4 repertoire file (Not used if repertoire_id == NULL) +# $5 manifest file +{ + # Use local variables - no scope issues please... + local output_directory=$1 + local repository_name=$2 + local repertoire_id=$3 + local repertoire_file=$4 + local manifest_file=$5 + echo "IR-INFO: Running a Repertoire Analysis with manifest ${manifest_file}" + echo "IR-INFO: Working directory = ${output_directory}" + echo "IR-INFO: Repository name = ${repository_name}" + echo "IR-INFO: Repertoire id = ${repertoire_id}" + echo "IR-INFO: Repertoire file = ${repertoire_file}" + echo "IR-INFO: Manifest file = ${manifest_file}" + echo -n "IR-INFO: Current diretory = " + pwd + + # Get a list of rearrangement files to process from the manifest. + local array_of_files=( `python3 ${GATEWAY_UTIL_DIR}/manifest_summary.py ${manifest_file} "rearrangement_file"` ) + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not process manifest file ${manifest_file}" + return + fi + echo "IR-INFO: Using files ${array_of_files[@]}" + + # Check to see if we are processing a specific repertoire_id + if [ "${repertoire_id}" != "Total" ]; then + file_string=`python3 ${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id} --separator "_"` + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not generate repertoire summary from ${repertoire_file}" + return + fi + title_string="$(python3 ${GATEWAY_UTIL_DIR}/repertoire_summary.py ${repertoire_file} ${repertoire_id})" + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not generate repertoire summary from ${repertoire_file}" + return + fi + + else + file_string="${repertoire_id}" + title_string="${repertoire_id}" + fi + + # Clean up special characters in file and title strings. + file_string=`echo ${repository_name}_${file_string} | sed "s/[!@#$%^&*() :/-]/_/g"` + # TODO: Fix this, it should not be required. + title_string=`echo ${title_string} | sed "s/[ ]//g"` + + # Generate the histogram and heatmap stats + do_histogram v_call $output_directory $file_string $title_string ${array_of_files[@]} + do_histogram d_call $output_directory $file_string $title_string ${array_of_files[@]} + do_histogram j_call $output_directory $file_string $title_string ${array_of_files[@]} + do_histogram junction_aa_length $output_directory $file_string $title_string ${array_of_files[@]} + do_heatmap v_call j_call $output_directory $file_string $title_string ${array_of_files[@]} + do_heatmap v_call junction_aa_length $output_directory $file_string $title_string ${array_of_files[@]} + # Remove the TSV files, we don't want to return them + for filename in "${array_of_files[@]}"; do + echo "IR-INFO: Removing data file $output_directory/$filename" + #rm -f $output_directory/$filename + done + + # Generate a label file for the Gateway to use to present this info to the user + label_file=${output_directory}/${repertoire_id}.txt + echo "${title_string}" > ${label_file} + + # Generate a summary HTML file for the Gateway to present this info to the user + html_file=${output_directory}/${repertoire_id}.html + + # Generate the HTML main block + printf '\n' > ${html_file} + printf '' >> ${html_file} + + # Generate a normal looking iReceptor header + printf '\n' >> ${html_file} + cat ${output_directory}/assets/head-template.html >> ${html_file} + printf "Stats: %s\n" ${title_string} >> ${html_file} + printf '\n' >> ${html_file} + + # Generate an iReceptor top bar for the page + cat ${output_directory}/assets/top-bar-template.html >> ${html_file} + + # Generate a normal looking iReceptor header + printf '
' >> ${html_file} + + printf "

Stats: %s

\n" ${title_string} >> ${html_file} + printf "

Analysis

\n" >> ${html_file} + printf "

V/J gene usage heatmap

\n" >> ${html_file} + printf '' ${file_string} >> ${html_file} + printf "

V gene/Junction AA Length heatmap

\n" >> ${html_file} + printf '' ${file_string} >> ${html_file} + printf "

V Gene usage

\n" >> ${html_file} + printf '' ${file_string} >> ${html_file} + printf "

D Gene usage

\n" >> ${html_file} + printf '' ${file_string} >> ${html_file} + printf "

J Gene usage

\n" >> ${html_file} + printf '' ${file_string} >> ${html_file} + printf "

Junction AA Length

\n" >> ${html_file} + printf '' ${file_string} >> ${html_file} + # End of main div container + printf '
' >> ${html_file} + + # Use the normal iReceptor footer. + cat ${output_directory}/assets/footer.html >> ${html_file} + + # Generate end body end HTML + printf '' >> ${html_file} + printf '' >> ${html_file} + +} + +# Set up the required variables. An iReceptor Gateway download consists +# of both an "info.txt" file that describes the download as well as an +# AIRR manifest JSON file that describes the relationships between +# AIRR Repertoire JSON files and AIRR TSV files. +INFO_FILE="info.txt" +AIRR_MANIFEST_FILE="AIRR-manifest.json" + +if [ "${split_repertoire}" = "True" ]; then + echo -e "IR-INFO:\nIR-INFO: Splitting data by Repertoire" + echo "IR-INFO:" + # Split the download into single repertoire files, with a directory per + # repository and within that a directory per repertoire. This expects the + # user to define a function called run_analysis() that will be + # called for each repertoire. See the docs in the gateway_utilities.sh file + # for parameters to this function. + gateway_split_repertoire ${INFO_FILE} ${AIRR_MANIFEST_FILE} ${ZIP_FILE} ${GATEWAY_ANALYSIS_DIR} + gateway_run_analysis ${INFO_FILE} ${AIRR_MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} + gateway_cleanup ${ZIP_FILE} ${AIRR_MANIFEST_FILE} ${GATEWAY_ANALYSIS_DIR} + +elif [ "${split_repertoire}" = "False" ]; then + echo -e "IR-INFO:\nIR-INFO: Running app on entire data set" + echo "IR-INFO:" + + # Output directory is called "Total" + # Run the analysis with a token repository name of "ADC" since the + # analysis is being run on data from the entire ADC. + # repertoire_id and repository should be "NULL" + # Lastly, provide the list of TSV files to process. Remember that + # the array elements are expanded into separate parameters, which + # the run_analyis function handles. + outdir="Total" + + # Copy the HTML resources for the Apps + echo "IR-INFO: Copying HTML assets" + mkdir -p ${GATEWAY_ANALYSIS_DIR}/${outdir}/assets + cp -r ${GATEWAY_UTIL_DIR}/assets/* ${GATEWAY_ANALYSIS_DIR}/${outdir}/assets + if [ $? -ne 0 ] + then + echo "IR-ERROR: Could not create HTML asset directory" + fi + + # Run the stats on all the data combined. Unzip the files + gateway_unzip ${ZIP_FILE} ${GATEWAY_ANALYSIS_DIR}/${outdir} + + # Run the stats analysis. + run_analysis ${GATEWAY_ANALYSIS_DIR}/${outdir} "AIRRDataCommons" ${outdir} "NULL" ${GATEWAY_ANALYSIS_DIR}/${outdir}/${AIRR_MANIFEST_FILE} + + # Copy the INFO_FILE to the analysis DIR as the Gateway expects it to be there. + cp ${GATEWAY_ANALYSIS_DIR}/${outdir}/${INFO_FILE} ${GATEWAY_ANALYSIS_DIR}/ + +else + echo "IR-ERROR: Unknown repertoire operation ${split_repertoire}" >&2 + exit 1 +fi + +# Make sure we are back where we started, although the gateway functions should +# not change the working directory that we are in. +cd ${SCRIPT_DIR} + +# We want to move the info.txt to the main directory as the Gateway uses it if +# it is available. +cp ${GATEWAY_ANALYSIS_DIR}/${INFO_FILE} . + +# We want to keep the job error and output files as part of the analysis output. +cp output/*.out ${GATEWAY_ANALYSIS_DIR} + +# ZIP up the analysis results for easy download +zip -r ${GATEWAY_ANALYSIS_DIR}.zip ${GATEWAY_ANALYSIS_DIR} + +# We don't want the iReceptor Utilities to be part of the results. +echo "IR-INFO: Removing Gateway utilities" +#rm -rf ${GATEWAY_UTIL_DIR} + +# We don't want the analysis files to remain - they are in the ZIP file +echo "IR-INFO: Removing analysis output" +#rm -rf ${GATEWAY_ANALYSIS_DIR} + +# Cleanup the input data files, don't want to return them as part of the resulting analysis +echo "IR-INFO: Removing original ZIP file $ZIP_FILE" +#rm -f $ZIP_FILE + +# Debugging output, print data/time when shell command is finished. +echo "IR-INFO: Statistics finished at: `date`" + From 4e968d575f17c59815dde819ca0a6d8c33c1aa77 Mon Sep 17 00:00:00 2001 From: bcorrie Date: Thu, 23 Mar 2023 23:08:21 +0000 Subject: [PATCH 018/221] Changed so email uses User class. Tested downloads, works with new implementation. --- app/Jobs/DownloadSequences.php | 68 +++++++++++++++++----------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/app/Jobs/DownloadSequences.php b/app/Jobs/DownloadSequences.php index bcabc145..434d3042 100644 --- a/app/Jobs/DownloadSequences.php +++ b/app/Jobs/DownloadSequences.php @@ -2,7 +2,6 @@ namespace App\Jobs; -use App\Agave; use App\Download; use App\LocalJob; use App\Query; @@ -125,25 +124,27 @@ public function handle() $this->download->save(); // send notification email - $agave = new Agave; - $token = $agave->getAdminToken(); - $user = $agave->getUserWithUsername($this->username, $token); - $email = $user->email; - - $date_str = $this->download->createdAtShort(); - - $t = []; - $t['page_url'] = config('app.url') . $this->download->page_url; - $t['file_url'] = config('app.url') . '/' . $this->download->file_url; - $t['download_page_url'] = config('app.url') . '/downloads'; - $t['download_days_available'] = self::DAYS_AVAILABLE; - $t['date_str'] = $date_str; - - Mail::send(['text' => 'emails.download_successful'], $t, function ($message) use ($email, $date_str) { - Log::debug('test1'); - Log::debug($email); - $message->to($email)->subject('[iReceptor] Your download from ' . $date_str . ' is ready'); - }); + $user = User::where('username', $this->username)->first(); + if ($user != null && $user->email != '') { + $email = $user->email; + + $date_str = $this->download->createdAtShort(); + + $t = []; + $t['page_url'] = config('app.url') . $this->download->page_url; + $t['file_url'] = config('app.url') . '/' . $this->download->file_url; + $t['download_page_url'] = config('app.url') . '/downloads'; + $t['download_days_available'] = self::DAYS_AVAILABLE; + $t['date_str'] = $date_str; + + Mail::send(['text' => 'emails.download_successful'], $t, function ($message) use ($email, $date_str) { + Log::debug('test1'); + Log::debug($email); + $message->to($email)->subject('[iReceptor] Your download from ' . $date_str . ' is ready'); + }); + } else { + Log::error('Error email not send. Could not find email for user ' . $this->username); + } if ($this->download->incomplete) { // email notification to iReceptor support @@ -178,19 +179,21 @@ public function failed(Throwable $e) $this->download->save(); // email notification to user - $agave = new Agave; - $token = $agave->getAdminToken(); - $user = $agave->getUserWithUsername($this->username, $token); - $email = $user->email; + $user = User::where('username', $this->username)->first(); + if ($user != null && $user->email != '') { + $email = $user->email; - $t = []; - $t['page_url'] = config('app.url') . $this->download->page_url; - $t['download_page_url'] = config('app.url') . '/downloads'; - $t['support_email'] = config('ireceptor.email_support'); - - Mail::send(['text' => 'emails.download_failed'], $t, function ($message) use ($email) { - $message->to($email)->subject('[iReceptor] Download error'); - }); + $t = []; + $t['page_url'] = config('app.url') . $this->download->page_url; + $t['download_page_url'] = config('app.url') . '/downloads'; + $t['support_email'] = config('ireceptor.email_support'); + + Mail::send(['text' => 'emails.download_failed'], $t, function ($message) use ($email) { + $message->to($email)->subject('[iReceptor] Download error'); + }); + } else { + Log::error('Error email not send. Could not find email for user ' . $this->username); + } $error_message = $e->getMessage(); $query_log_id = QueryLog::get_query_log_id(); @@ -208,7 +211,6 @@ public function failed(Throwable $e) $message->to(config('ireceptor.email_support'))->subject('Gateway Download Error for ' . $username); }); } - QueryLog::end_job($query_log_id, 'error', $error_message); } } From 28a7abf79ef9394004737085acaeae2365a52755 Mon Sep 17 00:00:00 2001 From: bcorrie Date: Fri, 24 Mar 2023 20:13:47 +0000 Subject: [PATCH 019/221] Experiments --- app/Http/Controllers/TestController.php | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/app/Http/Controllers/TestController.php b/app/Http/Controllers/TestController.php index 751e2b48..5800b4e7 100644 --- a/app/Http/Controllers/TestController.php +++ b/app/Http/Controllers/TestController.php @@ -2,7 +2,7 @@ namespace App\Http\Controllers; -use App\Agave; +use App\Tapis; use App\Download; use App\FieldName; use App\Job; @@ -32,6 +32,14 @@ public function getIndex(Request $request) // echo "done"; // die(); + // Generate password + // echo Hash::make('foobar');die(); + + // Test Tapis + $tapis = new Tapis; + die; + + // run FIRST // retrieve users from Agave $agave = new Agave; From 690a9dc9cf1f833aad15c1f669ee61a3aa11d8d9 Mon Sep 17 00:00:00 2001 From: bcorrie Date: Fri, 24 Mar 2023 22:31:38 +0000 Subject: [PATCH 020/221] Initial Tapis3 refactor AGAVE refrences removed Tapis class created Tapis URLs updated to Tapis3 Tapis3 tokens being created successfully Initial query for job ID working with Tapis 3 token Query fails, because Job ID is a Tapis 2 job. --- app/Agave.php | 799 -------------------- app/Http/Controllers/AdminController.php | 10 +- app/Http/Controllers/CellController.php | 8 +- app/Http/Controllers/CloneController.php | 8 +- app/Http/Controllers/JobController.php | 153 ++-- app/Http/Controllers/SequenceController.php | 8 +- app/Http/Controllers/SystemController.php | 52 +- app/Http/Controllers/UserController.php | 22 +- app/Http/Controllers/UtilController.php | 10 +- app/Job.php | 26 +- app/Jobs/LaunchAgaveJob.php | 265 ------- app/Jobs/ProcessAgaveNotification.php | 61 -- app/System.php | 60 +- app/User.php | 30 +- config/services.php | 65 ++ routes/api.php | 2 +- 16 files changed, 272 insertions(+), 1307 deletions(-) delete mode 100644 app/Agave.php delete mode 100644 app/Jobs/LaunchAgaveJob.php delete mode 100644 app/Jobs/ProcessAgaveNotification.php diff --git a/app/Agave.php b/app/Agave.php deleted file mode 100644 index d09d6b3d..00000000 --- a/app/Agave.php +++ /dev/null @@ -1,799 +0,0 @@ -initGuzzleRESTClient(); - - // Update the AppTemplates at start up. - $this->updateAppTemplates(); - - // Maximum run time for a job in hours. - //$this->maxRunTime = 4; - $this->maxRunTime = config('services.agave.system_execution.max_run_time'); - // Maximum number of processors per job. For now all serial jobs. - //$this->processorsPerNode = 1; - $this->processorsPerNode = config('services.agave.system_execution.processors_per_node'); - // Amount of memory per processor (in GB) - //$this->memoryPerProcessor = 8; - $this->memoryPerProcessor = config('services.agave.system_execution.memory_per_processor'); - - // Set up the default job contorl parameters used by AGAVE - $this->jobParameters = []; - // Run time parameter - $job_parameter = []; - $job_parameter['label'] = 'maxRunTime'; - $job_parameter['type'] = 'string'; - $job_parameter['name'] = 'Maximum run time (hh:mm:ss)'; - $job_parameter['description'] = 'Maximum run time for the job in hh:mm:ss. If the job takes longer than this to complete, the job will be terminated. A run time of longer than ' . strval($this->maxRunTime) . ' hours is not allowed.'; - $job_parameter['default'] = strval($this->maxRunTime) . ':00:00'; - $this->jobParameters[$job_parameter['label']] = $job_parameter; - - // Processors per node parameter - $job_parameter = []; - $job_parameter['label'] = 'processorsPerNode'; - $job_parameter['type'] = 'integer'; - $job_parameter['name'] = 'Number of CPUs (max ' . strval($this->processorsPerNode) . ')'; - $job_parameter['description'] = 'Number of CPUs used by the job, with a maximum of ' . strval($this->processorsPerNode) . ' per job. Note not all jobs will scale well so adding more CPUs may not reduce execution time.'; - $job_parameter['default'] = $this->processorsPerNode; - $this->jobParameters[$job_parameter['label']] = $job_parameter; - - // Memory per node parameter - // This doesn't seem to be working. Tapis does not seem to pass this on to - // the scheduler, so changing this has no effect. We go with 4GB/CPU through - // the default system config with a custom directive. - //$job_parameter = []; - //$job_parameter['label'] = 'memoryPerNode'; - //$job_parameter['type'] = 'string'; - //#$job_parameter['name'] = 'Memory per node'; - //$job_parameter['description'] = 'Amount of memory allocated per node used by the job'; - //$job_parameter['default'] = '4GB'; - //$this->jobParameters[$job_parameter['label']] = $job_parameter; - - // Number of nodes to use parameter - // We don't want to have jobs cross nodes, so we limit to one only - // Leaving this here in case we want to change that... - //$job_parameter = []; - //$job_parameter['label'] = 'nodeCount'; - //$job_parameter['type'] = 'integer'; - //$job_parameter['name'] = 'Number of nodes'; - //$job_parameter['description'] = 'Number of nodes used by the job'; - //$job_parameter['default'] = 1; - //$this->jobParameters[$job_parameter['label']] = $job_parameter; - } - - public function maxRunTime() - { - return $this->maxRunTime; - } - - public function processorsPerNode() - { - return $this->processorsPerNode; - } - - public function memoryPerProcessor() - { - return $this->memoryPerProcessor; - } - - public function isUp() - { - $url = config('services.agave.tenant_url'); - $apiKey = config('services.agave.api_key'); - $apiSecret = config('services.agave.api_token'); - - // user created specifically to test if AGAVE is up - $username = config('services.agave.test_user_username'); - $password = config('services.agave.test_user_password'); - - // try to get OAuth token - $t = $this->getToken($url, $username, $password, $apiKey, $apiSecret); - - return $t != null; - } - - public function getTokenForUser($username, $password) - { - $url = config('services.agave.tenant_url'); - $apiKey = config('services.agave.api_key'); - $apiSecret = config('services.agave.api_token'); - - // try to get token - Log::debug('Agave::getTokenForUser - Trying to get token for user: ' . $username); - $t = $this->getToken($url, $username, $password, $apiKey, $apiSecret); - Log::debug('Agave::getTokenForUser - Token info for user ' . $username . ' = ' . json_encode($t)); - - // return NULL or array with token and refresh token - return $t; - } - - public function getToken($url, $username, $password, $api_key, $api_secret) - { - Log::debug('Agave::getToken for ' . $username); - $headers = []; - $headers['Content-Type'] = 'application/x-www-form-urlencoded'; - - $auth = [$api_key, $api_secret]; - - $params = []; - $params['grant_type'] = 'password'; - $params['username'] = $username; - $params['password'] = $password; - $params['scope'] = 'PRODUCTION'; - - try { - // Normal respsonse from Tapis is: - // {"scope":"default","token_type":"bearer", - // "expires_in":14400, - // "refresh_token":"4e6e8a38f0a33f2cff7fe0318fe314db", - // "access_token":"8485748fbaa9a36efe941d8f3c36c2a1"} - $response = $this->client->request('POST', '/token', ['auth' => $auth, 'headers' => $headers, 'form_params' => $params, 'timeout' => 10]); - - $response = json_decode($response->getBody()); - Log::debug('Agave::getToken: respsonse = ' . json_encode($response)); - $this->raiseExceptionIfAgaveError($response); - } catch (ClientException $e) { - Log::debug('Agave::getToken - A ClientException occurred while getting a token from Agave:'); - Log::debug('Agave::getToken - ' . $e); - - return; - } - - Log::debug('Agave::getToken: returning respsonse = ' . json_encode($response)); - - return $response; - } - - public function getAdminToken() - { - $url = config('services.agave.tenant_url'); - $apiKey = config('services.agave.api_key'); - $apiSecret = config('services.agave.api_token'); - - // admin user allowed to create user accounts - $username = config('services.agave.admin_username'); - $password = config('services.agave.admin_password'); - - $t = $this->getToken($url, $username, $password, $apiKey, $apiSecret); - - return $t->access_token; - } - - public function renewToken($refresh_token) - { - Log::debug('Agave::renewToken - refresh_token = ' . json_encode($refresh_token)); - $api_key = config('services.agave.api_key'); - $api_secret = config('services.agave.api_token'); - - $headers = []; - $headers['Content-Type'] = 'application/x-www-form-urlencoded'; - - $auth = [$api_key, $api_secret]; - - $params = []; - $params['grant_type'] = 'refresh_token'; - $params['refresh_token'] = $refresh_token; - $params['scope'] = 'PRODUCTION'; - - try { - // Normal respsonse from Tapis is: - // {"scope":"default","token_type":"bearer", - // "expires_in":14400, - // "refresh_token":"4e6e8a38f0a33f2cff7fe0318fe314db", - // "access_token":"8485748fbaa9a36efe941d8f3c36c2a1"} - $response = $this->client->request('POST', '/token', ['auth' => $auth, 'headers' => $headers, 'form_params' => $params]); - // Convert the body of the Guzzle response JSON to a PHP object - $response_obj = json_decode($response->getBody()); - Log::debug('Agave::renewToken - refresh response = ' . json_encode($response_obj)); - // Check for Agave errors and raise an exception if we see one. - $this->raiseExceptionIfAgaveError($response_obj); - } catch (ClientException $e) { - Log::debug('Agave::renewToken - A ClientException occurred while getting a token from Agave:'); - Log::debug('Agave::renewToken - exception = ' . json_encode($e)); - - return null; - } - - Log::debug('Agave:renewToken - returning refresh response = ' . json_encode($response_obj)); - - return $response_obj; - } - - public function updateAppTemplates() - { - // Get the list of app directories. Note that this is the set of names/tags - // used for the Apps - $app_directories = config('services.agave.app_directories'); - // Build a list of Tapis App templates. - $this->appTemplates = []; - foreach ($app_directories as $app_dir) { - // Tapis Apps are stored in the resources/agave_apps directory. It is - // expected that each App that works on the iReceptor Gateway has an - // app.json file that is the Tapis definition of the App. We use this - // to determine how to submit the App to Tapis and to build the UI. - $file_path = resource_path('agave_apps/' . $app_dir . '/app.json'); - //Log::debug('updateAppTemplates: Trying to open App file ' . $file_path); - // Open the file and convert the JSON to an object. - try { - $app_json = file_get_contents($file_path); - } catch (Exception $e) { - Log::debug('updateAppTemplates: Could not open App file ' . $file_path); - Log::debug('updateAppTemplates: Error: ' . $e->getMessage()); - } - $app_config = json_decode($app_json, true); - // We want to store information about the app that is useful in helping us - // determine when to use it. This information is encoded in a JSON string in - // the App in the hidden App parameter ir_hint. - $param_count = 0; - $gateway_count = -1; - $app_info = []; - if (array_key_exists('parameters', $app_config)) { - $parameters = $app_config['parameters']; - // Loop over the parameters and check for special ir_ parameters - foreach ($parameters as $parameter) { - // ir_hints provides hints to the Gateway as to the capabilities - // of the App. - if (array_key_exists('id', $parameter) && $parameter['id'] == 'ir_hints') { - // If we found a JSON hint decode it - $hint_obj = json_decode($parameter['value']['default']); - //Log::debug('updateAppTemplates: hint_obj = ' . json_encode($hint_obj)); - // Get the object attribute - this tells us which AIRR object type this - // App can be applied to (e.g. Rearrangement, Clone, Cell). - $app_info['object'] = $hint_obj->object; - } elseif (array_key_exists('id', $parameter) && $parameter['id'] == 'ir_gateway_url') { - // The Tapis App uses ir_gateway_url to provide the URL of the source - // gateway that is submitting the job. This used to get assets specific - // to the given gateway. - $gateway_param = $parameter; - $gateway_param['value']['default'] = config('app.url'); - $gateway_count = $param_count; - } - $param_count = $param_count + 1; - } - } - - // Overwrite the gateway URL parameter configuration if we got one. - if ($gateway_count >= 0) { - //Log::debug('updateAppTemplates: replacing ' . json_encode($app_config['parameters'][$gateway_count])); - $app_config['parameters'][$gateway_count] = $gateway_param; - } - - // Store the object in a dictionary keyed with 'config'. We do this because - // we anticipate needing more information about the App that will be - // separate from the Tapis App. - $app_info['config'] = $app_config; - - // Save this app template keyed by the name/tag/dir - $this->appTemplates[$app_dir] = $app_info; - } - // Return the template list. - return $this->appTemplates; - } - - public function getAppTemplates($object_type) - { - // Return the list of app templates based on the AIRR object type provided. - $object_templates = []; - // For each app, filter it out based on the matching the Apps 'object' attribute - // with the value passed in. - foreach ($this->appTemplates as $app_tag => $app_info) { - if (array_key_exists('object', $app_info) && $app_info['object'] == $object_type) { - $object_templates[$app_tag] = $app_info; - } - } - - return $object_templates; - } - - public function getAppTemplate($app_tag) - { - Log::debug('getAppTemplate: looking for ' . $app_tag); - - // Return the app template for the given app tap/name. - if (array_key_exists($app_tag, $this->appTemplates)) { - return $this->appTemplates[$app_tag]; - } else { - Log::debug('getAppTemplate: could not find app ' . $app_tag); - - return null; - } - } - - public function getAppTemplateByLabel($app_label) - { - // Return the app template for the given app label. - foreach ($this->appTemplates as $app_tag => $app_info) { - // Get this template's label and if it is the same we found it. - $config = $app_info['config']; - $label = $config['label']; - if ($label == $app_label) { - return $app_info; - } - } - // Couldn't find it if we get here. - Log::debug('getAppTemplateByLabel: could not find ' . $app_label); - - return null; - } - - public function getJobParameters() - { - return $this->jobParameters; - } - - public function createSystem($token, $config) - { - $url = '/systems/v2/?pretty=true'; - - return $this->doPOSTRequestWithJSON($url, $token, $config); - } - - public function createApp($token, $config) - { - $url = '/apps/v2/?pretty=true'; - - return $this->doPOSTRequestWithJSON($url, $token, $config); - } - - public function createJob($token, $config) - { - $url = '/jobs/v2/?pretty=true'; - - return $this->doPOSTRequestWithJSON($url, $token, $config); - } - - public function createUser($token, $username, $first_name, $last_name, $email) - { - $password = str_random(24); - - $url = '/profiles/v2/?pretty=true'; - - $variables = [ - 'username' => $username, - 'password' => $password, - 'email' => $email, - 'first_name' => $first_name, - 'last_name' => $last_name, - ]; - - $this->doPOSTRequest($url, $token, $variables); - - return $variables; - } - - public function updateUser($token, $username, $first_name, $last_name, $email, $password = '') - { - $url = '/profiles/v2/' . $username; - - $variables = [ - 'first_name' => $first_name, - 'last_name' => $last_name, - 'email' => $email, - ]; - - if ($password != '') { - $variables['password'] = $password; - } - - $this->doPUTRequest($url, $token, $variables); - - return $variables; - } - - public function deleteUser($token, $username) - { - $url = '/profiles/v2/' . $username; - $this->doDELETERequest($url, $token); - } - - public function listApps($token) - { - $url = '/apps/v2/?pretty=true'; - - return $this->doGETRequest($url, $token); - } - - public function listSystems($token) - { - $url = '/systems/v2/?pretty=true'; - - return $this->doGETRequest($url, $token); - } - - public function getJobHistory($job_id, $token) - { - $url = '/jobs/v2/' . $job_id . '/history?pretty=true'; - - return $this->doGETRequest($url, $token, true); - } - - public function getJobStatus($job_id, $token) - { - $url = '/jobs/v2/' . $job_id . '/status?pretty=true'; - - return $this->doGETRequest($url, $token, true); - } - - public function getJob($job_id, $token) - { - $url = '/jobs/v2/' . $job_id; - - return $this->doGETRequest($url, $token, true); - } - - public function getJobOutputFile($job_id, $token, $file) - { - $url = '/jobs/v2/' . $job_id . '/outputs/media/' . $file; - - return $this->doGETRequest($url, $token, true); - } - - public function killJob($job_id, $token) - { - $url = '/jobs/v2/' . $job_id; - - // Send the kill action to the Agave job. - $config = ['action' => 'kill']; - - return $this->doPOSTRequestWithJSON($url, $token, $config); - } - - public function getExcutionSystemConfig($name, $host, $port, $username, $privateKey, $publicKey) - { - $t = [ - 'id' => $name, - 'name' => $name, - 'type' => 'EXECUTION', - 'executionType' => 'HPC', - 'scheduler' => 'SLURM', - 'queues' => [ - [ - 'default' => true, - 'name' => 'default', - 'maxRequestedTime' => strval($this->maxRunTime) . ':00:00', - 'maxNodes' => 1, - 'maxProcessorsPerNode' => $this->processorsPerNode, - 'maxMemoryPerNode' => '64GB', - 'customDirectives' => '--mem-per-cpu=' . $this->memoryPerProcessor . 'G', - ], - ], - - 'login' => [ - 'protocol' => 'SSH', - 'host' => $host, - 'port' => $port, - 'auth' => [ - 'type' => 'SSHKEYS', - 'username' => $username, - 'publicKey' => $publicKey, - 'privateKey' => $privateKey, - ], - ], - 'storage' => [ - 'protocol' => 'SFTP', - 'host' => $host, - 'port' => $port, - 'auth' => [ - 'type' => 'SSHKEYS', - 'username' => $username, - 'publicKey' => $publicKey, - 'privateKey' => $privateKey, - ], - 'rootDir' => '/home' . '/' . $username . '/scratch', - ], - ]; - - return $t; - } - - public function getStorageSystemConfig($name, $host, $port, $username, $privateKey, $publicKey, $rootDir) - { - $t = [ - 'id' => $name, - 'name' => $name, - 'type' => 'STORAGE', - 'storage' => [ - 'protocol' => 'SFTP', - 'host' => $host, - 'port' => $port, - 'auth' => [ - 'type' => 'SSHKEYS', - 'username' => $username, - 'publicKey' => $publicKey, - 'privateKey' => $privateKey, - ], - 'rootDir' => $rootDir, - ], - ]; - - return $t; - } - - public function getAppConfig($id, $name, $executionSystem, $deploymentSystem, $deploymentPath) - { - // Update the app templates. This shouldn't be necessary every - // time, but for now we will update them every time an App - // config is requested. - $this->updateAppTemplates(); - - // Get the app template and its config given the App ID/name - $app_template = $this->getAppTemplate($id); - $app_config = $app_template['config']; - - // We overwrite the systems and deployment paths so we know what - // apps are being used from where. - $app_config['name'] = $name; - $app_config['executionSystem'] = $executionSystem; - $app_config['deploymentSystem'] = $deploymentSystem; - $app_config['deploymentPath'] = $deploymentPath; - Log::debug('Agave::getAppConfig: App config:'); - Log::debug($app_config); - - return $app_config; - } - - public function getJobConfig($name, $app_id, $storage_archiving, $notification_url, $folder, $params, $inputs, $job_params) - { - $t = [ - 'name' => $name, - 'appId' => $app_id, - 'parameters' => $params, - 'inputs' => $inputs, - 'archive' => true, - 'archiveSystem' => $storage_archiving, - 'archivePath' => $folder, - 'notifications' => [ - [ - 'url' => $notification_url . '/agave/update-status/${JOB_ID}/${JOB_STATUS}', - 'event' => '*', - 'persistent' => true, - ], - ], - ]; - - // Set up the job parameters. We loop over the possible job parameters and - // check to see if any of them are set in the job_params provided by the caller. - foreach ($this->getJobParameters() as $job_parameter_info) { - Log::debug(' getJobConfig: Processing job parameter ' . $job_parameter_info['label']); - // If the parameter is provided by the caller, process it. - if (isset($job_params[$job_parameter_info['label']])) { - Log::debug(' getJobConfig: Parameter value = ' . $job_params[$job_parameter_info['label']]); - // We need to make sure the type is correct or the JSON will fail. - // Once convereted, we set the parameter based on the label. The label - // in the config MUST be the correct Tapis label for that field. - if ($job_parameter_info['type'] == 'integer') { - $t[$job_parameter_info['label']] = intval($job_params[$job_parameter_info['label']]); - } else { - $t[$job_parameter_info['label']] = $job_params[$job_parameter_info['label']]; - } - } else { - Log::debug(' getJobConfig: default value = ' . $job_parameter_info['default']); - $t[$job_parameter_info['label']] = $job_parameter_info['default']; - } - } - - Log::debug('size of params = ' . count($params)); - if (count($params) == 0) { - unset($t['parameters']); - } - - return $t; - } - - public function getUsers($token) - { - $url = '/profiles/v2/?pretty=true'; - $response = $this->doGETRequest($url, $token); - - return $response->result; - } - - public function getUser($username, $token) - { - $url = '/profiles/v2/' . $username; - - return $this->doGETRequest($url, $token); - } - - public function userExists($username, $token) - { - $l = $this->getUsers($token); - foreach ($l as $u) { - if ($u->username == $username) { - return true; - } - } - - return false; - } - - public function generateUsername($first_name, $last_name, $token) - { - $first_name_stripped = str_replace(' ', '', $first_name); - $last_name_stripped = str_replace(' ', '', $last_name); - $username = strtolower($first_name_stripped) . '_' . strtolower($last_name_stripped); - $username = iconv('UTF-8', 'ASCII//TRANSLIT', $username); // remove diacritics - - // if username already exists, append number - if ($this->userExists($username, $token)) { - $i = 2; - $alternate_username = $username . $i; - while ($this->userExists($alternate_username, $token)) { - $i++; - $alternate_username = $username . $i; - } - $username = $alternate_username; - } - - return $username; - } - - public function getUserWithEmail($email, $token) - { - $user = null; - - $user_list = $this->getUsers($token); - foreach ($user_list as $u) { - if ($u->email == $email) { - $user = $u; - } - } - - return $user; - } - - public function getUserWithUsername($username, $token) - { - $user = null; - - $user_list = $this->getUsers($token); - foreach ($user_list as $u) { - if ($u->username == $username) { - $user = $u; - } - } - - return $user; - } - - public function generateSSHKeys() - { - $rsa = new RSA(); - $rsa->setPublicKeyFormat(RSA::PUBLIC_FORMAT_OPENSSH); - $keys = $rsa->createKey(); - - return ['public' => $keys['publickey'], 'private' => $keys['privatekey']]; - } - - private function doGETRequest($url, $token, $raw_json = false) - { - return $this->doHTTPRequest('GET', $url, $token, [], null, $raw_json); - } - - public function doPOSTRequest($url, $token, $variables = [], $body = null) - { - return $this->doHTTPRequest('POST', $url, $token, $variables, $body); - } - - public function doPUTRequest($url, $token, $variables = []) - { - return $this->doHTTPRequest('PUT', $url, $token, $variables); - } - - public function doDELETERequest($url, $token) - { - return $this->doHTTPRequest('DELETE', $url, $token); - } - - public function doPOSTRequestWithJSON($url, $token, $config) - { - // convert config object to json - $json = json_encode($config, JSON_PRETTY_PRINT); - Log::info('json request -> ' . $json); - - return $this->doPOSTRequest($url, $token, [], $json); - } - - private function initGuzzleRESTClient() - { - $defaults = []; - - // set tenant URL - $tenant_url = config('services.agave.tenant_url'); - $defaults['base_uri'] = $tenant_url; - - // accept self-signed SSL certificates - $defaults['verify'] = false; - - $this->client = new \GuzzleHttp\Client($defaults); - } - - private function doHTTPRequest($method, $url, $token, $variables = [], $body = null, $raw_json = false) - { - $headers = []; - $headers['Authorization'] = 'Bearer ' . $token; - Log::debug('Agave::doHTTPRequest - Bearer:' . $token); - - $data = []; - if ($body == null) { - $data = ['headers' => $headers, 'form_params' => $variables]; - } else { - $headers['Content-Type'] = 'application/json'; - // dd($body); - $data = ['headers' => $headers, 'body' => $body]; - } - - try { - $response = $this->client->request($method, $url, $data); - } catch (ClientException $exception) { - $response = $exception->getResponse()->getBody()->getContents(); - Log::error('Agave::doHTTPRequest:: ClientException - query = ' . $url); - Log::error('Agave::doHTTPRequest:: ClientException - response = ' . $response); - $this->raiseExceptionIfAgaveError($response); - - return $response; - } - - // return response as object - $json = $response->getBody(); - //Log::debug('json response -> ' . $json); - if ($raw_json) { - return $json; - } else { - $response = json_decode($json); - $this->raiseExceptionIfAgaveError($response); - - return $response; - } - } - - public function isAgaveError($response) - { - if ($response == null) { - return true; - } - if (property_exists($response, 'error')) { - return true; - } - if (property_exists($response, 'fault')) { - return true; - } - if (property_exists($response, 'status') && $response->status == 'error') { - return true; - } - - return false; - } - - private function raiseExceptionIfAgaveError($response) - { - if ($response == null) { - throw new \Exception('AGAVE error: response was empty'); - } - if (property_exists($response, 'error')) { - throw new \Exception('AGAVE error: ' . $response->error . ': ' . $response->error_description); - } - if (property_exists($response, 'status') && $response->status == 'error') { - throw new \Exception('AGAVE error: ' . $response->message); - } - if (property_exists($response, 'fault')) { - throw new \Exception('AGAVE error: ' . $response->fault->message); - } - } -} diff --git a/app/Http/Controllers/AdminController.php b/app/Http/Controllers/AdminController.php index ad3d90f7..0eeb9a55 100644 --- a/app/Http/Controllers/AdminController.php +++ b/app/Http/Controllers/AdminController.php @@ -2,7 +2,7 @@ namespace App\Http\Controllers; -use App\Agave; +use App\Tapis; use App\CachedSample; use App\Download; use App\FieldName; @@ -288,10 +288,10 @@ public function postEditUser(Request $request) public function getDeleteUser($username) { - // create Agave account - $agave = new Agave; - $token = $agave->getAdminToken(); - $agave->deleteUser($token, $username); + // create Tapis account + $tapis = new Tapis; + $token = $tapis->getAdminToken(); + $tapis->deleteUser($token, $username); return redirect('admin/users')->with('notification', 'User ' . $username . ' was successfully deleted.'); } diff --git a/app/Http/Controllers/CellController.php b/app/Http/Controllers/CellController.php index 90d59dad..8ae4c935 100644 --- a/app/Http/Controllers/CellController.php +++ b/app/Http/Controllers/CellController.php @@ -2,7 +2,7 @@ namespace App\Http\Controllers; -use App\Agave; +use App\Tapis; use App\Bookmark; use App\Download; use App\FieldName; @@ -229,13 +229,13 @@ public function index(Request $request) $data['filter_fields'] = $filter_fields; // Get information about all of the Apps for the AIRR "Cell" object - $agave = new Agave; - $appTemplates = $agave->getAppTemplates('Cell'); + $tapis = new Tapis; + $appTemplates = $tapis->getAppTemplates('Cell'); $app_list = []; // Store the normal job contorl parameters for the UI. The same parameters are used // by all Apps. - $job_parameter_list = $agave->getJobParameters(); + $job_parameter_list = $tapis->getJobParameters(); // For each app, set up the info required by the UI for the App parameters. foreach ($appTemplates as $app_tag => $app_info) { diff --git a/app/Http/Controllers/CloneController.php b/app/Http/Controllers/CloneController.php index adc2e13c..a9d10953 100644 --- a/app/Http/Controllers/CloneController.php +++ b/app/Http/Controllers/CloneController.php @@ -2,7 +2,7 @@ namespace App\Http\Controllers; -use App\Agave; +use App\Tapis; use App\Bookmark; use App\Download; use App\FieldName; @@ -218,13 +218,13 @@ public function index(Request $request) $data['filter_fields'] = $filter_fields; // Get information about all of the Apps for the AIRR "Clone" object - $agave = new Agave; - $appTemplates = $agave->getAppTemplates('Clone'); + $tapis = new Tapis; + $appTemplates = $tapis->getAppTemplates('Clone'); $app_list = []; // Store the normal job contorl parameters for the UI. The same parameters are used // by all Apps. - $job_parameter_list = $agave->getJobParameters(); + $job_parameter_list = $tapis->getJobParameters(); // For each app, set up the info required by the UI for the App parameters. foreach ($appTemplates as $app_tag => $app_info) { diff --git a/app/Http/Controllers/JobController.php b/app/Http/Controllers/JobController.php index ee7f5b8b..cf75b099 100644 --- a/app/Http/Controllers/JobController.php +++ b/app/Http/Controllers/JobController.php @@ -2,9 +2,9 @@ namespace App\Http\Controllers; -use App\Agave; +use App\Tapis; use App\Job; -use App\Jobs\LaunchAgaveJob; +use App\Jobs\LaunchJob; use App\Jobs\PrepareDataForThirdPartyAnalysis; use App\JobStep; use App\LocalJob; @@ -45,28 +45,28 @@ public function getJobData($job_id) // essentially map to elements in the HTML and are updated through // the JS code in main.js - in the Jobs section. $data['status'] = $job->status; - $data['agave_status'] = $job->agave_status; + $data['agave_status'] = $job->getJobStatus(); $data['submission_date_relative'] = $job->createdAtRelative(); $data['run_time'] = $job->totalTime(); $data['job_url'] = $job->url; $data['job'] = $job; - // Create an Agave object to work with. This is constant across all jobs. - $agave = new Agave; + // Create an Tapis object to work with. This is constant across all jobs. + $tapis = new Tapis; // Build the job_summary block HTML $data['job_summary'] = []; $s = '

App Parameters

'; $s .= '

'; // Get the JSON from the Job, we need info from it. $param_count = 0; - $agave_json = $this->getAgaveJobJSON($job->id, $agave); + $job_json = $this->getJobJSON($job->id, $tapis); // If we have a JSON string for the Job, process the App parameters. - if ($agave_json != null) { + if ($job_json != null) { // Get the App template for this App. The job stores the App Label so // we use the label to look it up. This returns a JSON object in the form // of an AGave App template. - $app_template = $agave->getAppTemplateByLabel($job->app); + $app_template = $tapis->getAppTemplateByLabel($job->app); // Get the parameters from this app template. We need to map parameter tags // to human readable names if possible. $template_parameters = []; @@ -75,9 +75,9 @@ public function getJobData($job_id) $template_parameters = $config['parameters']; } - // Get the Agave job status and from it get the parameters. - $agave_status = json_decode($agave_json); - $app_parameters = $agave_status->result->parameters; + // Get the Tapis job status and from it get the parameters. + $job_status = json_decode($job_json); + $app_parameters = $job_status->result->parameters; // For each parameter, add some text to the display string. foreach ($app_parameters as $param => $value) { // Basic parameters are key values with strings. Special @@ -104,9 +104,9 @@ public function getJobData($job_id) $s .= '

Job Parameters

'; $s .= '

'; - $s .= 'Number of cores: ' . strval($agave->processorsPerNode()) . '
\n'; - $s .= 'Maximum memory per core: ' . strval($agave->memoryPerProcessor()) . ' GB
\n'; - $s .= 'Maximum run time: ' . strval($agave->maxRunTime()) . ' hours
\n'; + $s .= 'Number of cores: ' . strval($tapis->processorsPerNode()) . '
\n'; + $s .= 'Maximum memory per core: ' . strval($tapis->memoryPerProcessor()) . ' GB
\n'; + $s .= 'Maximum run time: ' . strval($tapis->maxRunTime()) . ' hours
\n'; $s .= '

'; $data['job_summary'] = explode('\n', $s); @@ -115,7 +115,7 @@ public function getJobData($job_id) $data['job_control_button'] = []; $s = ''; $s .= ''; - if ($job->agave_id == '') { + if ($job->getJobID() == '') { $s .= '