From daf2e009c5e09f4069a118f0727a2cc89d5afc99 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 17:15:42 -0300 Subject: [PATCH 01/21] Remove unused `restore.sh` script --- bin/restore.sh | 64 -------------------------------------------------- 1 file changed, 64 deletions(-) delete mode 100755 bin/restore.sh diff --git a/bin/restore.sh b/bin/restore.sh deleted file mode 100755 index 98d1190..0000000 --- a/bin/restore.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash -e - -# settings -MONGODB_HOST=${MONGODB_HOST:-mongo} -TARGET_FILE=${TARGET_FILE} -MONGORESTORE_OPTS=${MONGORESTORE_OPTS:-} - -# start script -CWD=`/usr/bin/dirname $0` -cd $CWD -. ./functions.sh - -echo "=== $0 started at `/bin/date "+%Y/%m/%d %H:%M:%S"` ===" - -TMPDIR="/tmp" -TARGET_DIRNAME="mongodump" -TARGET="${TMPDIR}/${TARGET_DIRNAME}" -TAR_CMD="/bin/tar" -TAR_OPTS="jxvf" - -DIRNAME=`/usr/bin/dirname ${TARGET}` -BASENAME=`/usr/bin/basename ${TARGET}` -TARBALL_FULLPATH="${TMPDIR}/${TARGET_FILE}" -TARBALL_FULLURL=${TARGET_BUCKET_URL}${TARGET_FILE} - -# check parameters -if [ "x${TARGET_BUCKET_URL}" == "x" ]; then - echo "ERROR: The environment variable TARGET_BUCKET_URL must be specified." 1>&2 - exit 1 -fi -if [ "x${TARGET_FILE}" == "x" ]; then - echo "ERROR: The environment variable TARGET_FILE must be specified." 1>&2 - exit 1 -fi - -if [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then - # download tarball from Amazon S3 - s3_copy_file ${TARBALL_FULLURL} ${TARBALL_FULLPATH} -elif [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "gs" ]; then - gs_copy_file ${TARBALL_FULLURL} ${TARBALL_FULLPATH} -fi - -# run tar command -echo "expands ${TARGET}..." -time ${TAR_CMD} ${TAR_OPTS} ${TARBALL_FULLPATH} -C ${DIRNAME} ${BASENAME} - -# restore database -if [ "x${MONGODB_DBNAME}" != "x" ]; then - MONGORESTORE_OPTS="--nsInclude=${MONGODB_DBNAME}.* ${MONGORESTORE_OPTS}" -fi - -if [ "x${MONGODB_URI}" != "x" ]; then - MONGORESTORE_OPTS="--uri=${MONGODB_URI} ${MONGORESTORE_OPTS}" -else - if [ "x${MONGODB_USERNAME}" != "x" ]; then - MONGORESTORE_OPTS="${MONGORESTORE_OPTS} -u ${MONGODB_USERNAME} -p ${MONGODB_PASSWORD}" - fi - if [ "x${MONGODB_AUTHDB}" != "x" ]; then - MONGORESTORE_OPTS="${MONGORESTORE_OPTS} --authenticationDatabase ${MONGODB_AUTHDB}" - fi - MONGORESTORE_OPTS="-h ${MONGODB_HOST} ${MONGORESTORE_OPTS}" -fi -echo "restore MongoDB..." -mongorestore -v ${TARGET} ${MONGORESTORE_OPTS} From 5be23c23bce4b3dd0cd8b82df1960c5cdd030949 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 17:42:24 -0300 Subject: [PATCH 02/21] Update run_backup.yml --- .github/workflows/run_backup.yml | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run_backup.yml b/.github/workflows/run_backup.yml index 1509c24..beea482 100644 --- a/.github/workflows/run_backup.yml +++ b/.github/workflows/run_backup.yml @@ -17,14 +17,26 @@ on: env: AWS_REGION: us-east-1 # set this to your preferred AWS region, e.g. us-west-1 - ECR_REPOSITORY: mongodb-awesome-backup # set this to your Amazon ECR repository name defaults: run: shell: bash jobs: - backup-to-s3: + build: + name: Build + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Build Docker + id: build-docker-image + run: | + docker build -t aiid-docker-backup:latest . + + backup-to-buckets: name: Backup runs-on: ubuntu-latest @@ -39,9 +51,6 @@ jobs: - name: Generate public and private backups id: build-image run: | - ls - pwd - ls -a docker run --rm \ -e AWS_ACCESS_KEY_ID="${{ secrets.AWS_S3_BACKUP_USER_ACCESS_KEY_ID }}" \ -e AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_S3_BACKUP_USER_SECRET_ACCESS_KEY }}" \ @@ -63,5 +72,5 @@ jobs: -e MONGODB_AUTHDB="admin" \ -e AWSCLI_ENDPOINT_OPT="" \ -v ~:/mab \ - public.ecr.aws/${{ vars.AWS_ECR_ID || 'd4w2c1x5'}}/mongodb-awesome-backup:latest + aiid-docker-backup:latest From 13955793b38c20d48077d97e69d0a1052c55253f Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 17:47:30 -0300 Subject: [PATCH 03/21] Update run_backup.yml --- .github/workflows/run_backup.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run_backup.yml b/.github/workflows/run_backup.yml index beea482..b0be259 100644 --- a/.github/workflows/run_backup.yml +++ b/.github/workflows/run_backup.yml @@ -38,6 +38,7 @@ jobs: backup-to-buckets: name: Backup + needs: build runs-on: ubuntu-latest steps: From bf5c7c10a1bc0b3fd95e0d4433a569ca484d6c1b Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 17:52:53 -0300 Subject: [PATCH 04/21] Update run_backup.yml --- .github/workflows/run_backup.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run_backup.yml b/.github/workflows/run_backup.yml index b0be259..31dc8a2 100644 --- a/.github/workflows/run_backup.yml +++ b/.github/workflows/run_backup.yml @@ -31,10 +31,16 @@ jobs: - name: Checkout uses: actions/checkout@v2 - - name: Build Docker - id: build-docker-image - run: | - docker build -t aiid-docker-backup:latest . + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Build Docker image + uses: docker/build-push-action@v2 + with: + context: . + file: ./Dockerfile + load: true + tags: aiid-docker-backup:latest backup-to-buckets: name: Backup From d3f291d1d4215ca476d4b57b7f9d3e7545d30646 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 18:03:45 -0300 Subject: [PATCH 05/21] Update run_backup.yml --- .github/workflows/run_backup.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run_backup.yml b/.github/workflows/run_backup.yml index 31dc8a2..9282c62 100644 --- a/.github/workflows/run_backup.yml +++ b/.github/workflows/run_backup.yml @@ -32,10 +32,10 @@ jobs: uses: actions/checkout@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Build Docker image - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v5 with: context: . file: ./Dockerfile @@ -56,7 +56,7 @@ jobs: aws-region: ${{ env.AWS_REGION }} - name: Generate public and private backups - id: build-image + id: run-backups run: | docker run --rm \ -e AWS_ACCESS_KEY_ID="${{ secrets.AWS_S3_BACKUP_USER_ACCESS_KEY_ID }}" \ From 130e4c4e7bff7584ea9c697b51e116900493a32a Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 18:07:01 -0300 Subject: [PATCH 06/21] Update run_backup.yml --- .github/workflows/run_backup.yml | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/.github/workflows/run_backup.yml b/.github/workflows/run_backup.yml index 9282c62..0993d66 100644 --- a/.github/workflows/run_backup.yml +++ b/.github/workflows/run_backup.yml @@ -23,8 +23,8 @@ defaults: shell: bash jobs: - build: - name: Build + build-and-run-backups: + name: Backup runs-on: ubuntu-latest steps: @@ -42,12 +42,6 @@ jobs: load: true tags: aiid-docker-backup:latest - backup-to-buckets: - name: Backup - needs: build - runs-on: ubuntu-latest - - steps: - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: @@ -56,7 +50,6 @@ jobs: aws-region: ${{ env.AWS_REGION }} - name: Generate public and private backups - id: run-backups run: | docker run --rm \ -e AWS_ACCESS_KEY_ID="${{ secrets.AWS_S3_BACKUP_USER_ACCESS_KEY_ID }}" \ From 5e54160d7363ea19b504d29ff09f466ae3155cee Mon Sep 17 00:00:00 2001 From: kepae Date: Thu, 2 Nov 2023 16:24:29 -0400 Subject: [PATCH 07/21] simplify file uploading arguments and function --- bin/cloudflare_s3_operations.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/bin/cloudflare_s3_operations.py b/bin/cloudflare_s3_operations.py index 59b64eb..5d69608 100644 --- a/bin/cloudflare_s3_operations.py +++ b/bin/cloudflare_s3_operations.py @@ -7,7 +7,9 @@ def parse_arguments(): - parser = argparse.ArgumentParser(description="TODO") + parser = argparse.ArgumentParser( + description="Simple client for uploading, deleting, listing, and checking objects in Cloudlfare R2 buckets." + ) parser.add_argument( "--operation", @@ -18,15 +20,20 @@ def parse_arguments(): # Arguments that are always required. parser.add_argument("--account_id", required=True, help="Cloudflare account ID") - parser.add_argument("--access_key", required=True, help="Cloudflare R2 bucket access key") - parser.add_argument("--secret_key", required=True, help="Cloudflare R2 bucket secret key") - parser.add_argument("--bucket_name", required=True, help="Cloudflare R2 bucket name") + parser.add_argument( + "--access_key", required=True, help="Cloudflare R2 bucket access key" + ) + parser.add_argument( + "--secret_key", required=True, help="Cloudflare R2 bucket secret key" + ) + parser.add_argument( + "--bucket_name", required=True, help="Cloudflare R2 bucket name" + ) parser.add_argument( "--file_path", required=False, help="Path to the file to be uploaded or deleted.", - type=argparse.FileType("r"), ) parser.add_argument( "--object_key", @@ -75,13 +82,12 @@ def main(args): print(obj["Key"], "size:", obj["Size"]) elif args.operation == "upload": - with open(args.file_path, "rb") as f: - s3_client.upload_fileobj( - f, - args.bucket_name, - args.object_key, - ExtraArgs={"ContentType": "application/x-bzip2"}, - ) + s3_client.upload_file( + args.file_path, + args.bucket_name, + args.object_key, + ExtraArgs={"ContentType": "application/x-bzip2"}, + ) print("-----------------------------") print( f"Successfully uploaded file {args.file_path} (key: {args.object_key}) to bucket {args.bucket_name}" @@ -102,7 +108,7 @@ def main(args): else: raise NotImplementedError - + sys.exit() From a1f280af5d1e040354351cb9ecf2ccda95faba44 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 18:53:42 -0300 Subject: [PATCH 08/21] Delete .github/workflows/build_and_push.yml --- .github/workflows/build_and_push.yml | 57 ---------------------------- 1 file changed, 57 deletions(-) delete mode 100644 .github/workflows/build_and_push.yml diff --git a/.github/workflows/build_and_push.yml b/.github/workflows/build_and_push.yml deleted file mode 100644 index 45ee743..0000000 --- a/.github/workflows/build_and_push.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: Deploy to Amazon ECR - -on: push - -#on: -# create: -# tags: -# - '*' - -env: - AWS_REGION: us-east-1 # set this to your preferred AWS region, e.g. us-west-1 - ECR_REPOSITORY: mongodb-awesome-backup # set this to your Amazon ECR repository name - -defaults: - run: - shell: bash - -jobs: - deploy: - name: Deploy - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v2 - with: - mask-password: 'true' - - - name: Build, tag, and push image to Amazon ECR - id: build-image - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - IMAGE_TAG: ${{ github.sha }} - run: | - # Build a docker container and - # push it to ECR - - aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/${{ vars.AWS_ECR_ID || 'd4w2c1x5'}} - docker build -t mongodb-awesome-backup . - docker tag mongodb-awesome-backup:latest public.ecr.aws/${{ vars.AWS_ECR_ID || 'd4w2c1x5'}}/mongodb-awesome-backup:latest - docker push public.ecr.aws/${{ vars.AWS_ECR_ID || 'd4w2c1x5'}}/mongodb-awesome-backup:latest - - # Old commands - #docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . - #docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - #echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_ENV From efd47ec435a0f04eaf1fe508ccff695f29feb0e0 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 18:55:09 -0300 Subject: [PATCH 09/21] Update run_backup.yml --- .github/workflows/run_backup.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/run_backup.yml b/.github/workflows/run_backup.yml index 0993d66..3ba2b78 100644 --- a/.github/workflows/run_backup.yml +++ b/.github/workflows/run_backup.yml @@ -70,7 +70,6 @@ jobs: -e MONGODB_USERNAME="${{ secrets.MONGO_BACKUP_USER }}" \ -e MONGODB_PASSWORD="${{ secrets.MONGO_BACKUP_USER_PASSWORD }}" \ -e MONGODB_AUTHDB="admin" \ - -e AWSCLI_ENDPOINT_OPT="" \ -v ~:/mab \ aiid-docker-backup:latest From da9c0737028e06fb521024327f43add159eb33b2 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 18:42:58 -0300 Subject: [PATCH 10/21] Remove unnecessary variables from README --- README.md | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 040c957..bf4c6eb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ This is a quick port of the forked project to support JSON and CSV backups of the [AIID](https://incidentdatabase.ai/). -The complete state of the database will be backed up on a weekly basis in both JSON and CSV form. The backups can be downloaded from here: todo +The complete state of the database will be backed up on a weekly basis in both JSON and CSV form. The backups can be downloaded from [here](https://incidentdatabase.ai/research/snapshots/). What is mongodb-awesome-backup? ------------------------------- @@ -13,11 +13,11 @@ Requirements ------------ Amazon IAM Access Key ID/Secret Access Key, which must have the access rights of the target Amazon S3 bucket. +Cloudflare R2 Access Key ID/Secret Access Key, which must have the access rights of the target Cloudflare R2 bucket. +MongoDB credentials with read access to the target database. Usage ----- -Note that either AWS_ or GCP_ vars are required not both. - ```bash docker run --rm \ -e AWS_ACCESS_KEY_ID= \ @@ -36,7 +36,6 @@ docker run --rm \ [ -e MONGODB_USERNAME= \ ] [ -e MONGODB_PASSWORD= \ ] [ -e MONGODB_AUTHDB= \ ] - [ -e AWSCLI_ENDPOINT_OPT= \ ] [ -v ~:/mab \ ] weseek/mongodb-awesome-backup ``` @@ -67,10 +66,6 @@ Environment variables | Variable | Description | Default | | --------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -| GCP_SERVICE_ACCOUNT_KEY_JSON_PATH | JSON file path to your GCP Service Account Key | - | -| GCP_ACCESS_KEY_ID | Your GCP Access Key | - | -| GCP_SECRET_ACCESS_KEY | Your GCP Secret | - | -| GCP_PROJECT_ID | Your GCP Project ID | - | | BACKUPFILE_PREFIX | Prefix of Backup Filename | "backup" | | MONGODB_URI | Target MongoDB URI (ex. `mongodb://mongodb?replicaSet=rs0`). If set, the other `MONGODB_*` variables will be ignored. | - | | MONGODB_HOST | Target MongoDB Host | "mongo" | @@ -80,7 +75,3 @@ Environment variables | MONGODB_AUTHDB | Authentication DB name | - | | CRONMODE | If set "true", this container is executed in cron mode. In cron mode, the script will be executed with the specified arguments and at the time specified by CRON_EXPRESSION. | "false" | | CRON_EXPRESSION | Cron expression (ex. "CRON_EXPRESSION=0 4 * * *" if you want to run at 4:00 every day) | - | -| AWSCLI_ENDPOINT_OPT | Set a custom S3 endpoint if you use a S3 based service like DigitalOcean Spaces. (ex. AWSCLI_ENDPOINT_OPT="https://fra1.digitaloceanspaces.com") If not set the Amazon S3 standard endpoint will be used. | - | -| AWSCLIOPT | Other options you want to pass to `aws` command | - | -| GCSCLIOPT | Other options you want to pass to `gsutil` command | - | -| HEALTHCHECKS_URL | URL that gets called after a successful backup (eg. https://healthchecks.io) | - | From a570ef404f3afd29c2bae3866c782c5acf6e6e6f Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 18:51:23 -0300 Subject: [PATCH 11/21] Remove all references to GS --- README.md | 8 +++---- bin/backup_filtered_data.sh | 9 -------- bin/backup_full_snapshot.sh | 10 --------- bin/functions.sh | 42 ++++--------------------------------- bin/list.sh | 3 --- bin/prune.sh | 2 -- 6 files changed, 8 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index bf4c6eb..bb8777c 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,8 @@ Usage docker run --rm \ -e AWS_ACCESS_KEY_ID= \ -e AWS_SECRET_ACCESS_KEY= \ - -e TARGET_PRIVATE_BUCKET_URL= \ - -e TARGET_PUBLIC_BUCKET_URL= \ + -e TARGET_PRIVATE_BUCKET_URL= \ + -e TARGET_PUBLIC_BUCKET_URL= \ -e CLOUDFLARE_ACCOUNT_ID= \ -e CLOUDFLARE_R2_ACCESS_KEY= \ -e CLOUDFLARE_R2_SECRET_KEY= \ @@ -54,8 +54,8 @@ Environment variables | --------------------- | ------------------------------------------------------------------------------ | ------- | | AWS_ACCESS_KEY_ID | Your IAM Access Key ID | - | | AWS_SECRET_ACCESS_KEY | Your IAM Secret Access Key | - | -| TARGET_PRIVATE_BUCKET_URL | Target private Bucket URL ([s3://...\|gs://...]). **URL is needed to be end with '/'** | - | -| TARGET_PUBLIC_BUCKET_URL | Target public Bucket URL ([s3://...\|gs://...]). **URL is needed to be end with '/'** | - | +| TARGET_PRIVATE_BUCKET_URL | Target private Bucket URL (s3://...). **URL is needed to be end with '/'** | - | +| TARGET_PUBLIC_BUCKET_URL | Target public Bucket URL (s3://...). **URL is needed to be end with '/'** | - | | CLOUDFLARE_ACCOUNT_ID | Cloudflare R2 account ID | - | | CLOUDFLARE_R2_ACCESS_KEY | Cloudflare R2 Access Key ID | - | | CLOUDFLARE_R2_SECRET_KEY | Cloudflare R2 Access Secret ID | - | diff --git a/bin/backup_filtered_data.sh b/bin/backup_filtered_data.sh index 80a597c..640d55c 100755 --- a/bin/backup_filtered_data.sh +++ b/bin/backup_filtered_data.sh @@ -6,13 +6,6 @@ echo "Starting backup_csv_data.sh script execution..." BACKUPFILE_PREFIX=${BACKUPFILE_PREFIX:-backup} MONGODB_HOST=${MONGODB_HOST:-mongo} CRONMODE=${CRONMODE:-false} -#MONGODB_URI= -#MONGODB_HOST= -#MONGODB_DBNAME= -#MONGODB_USERNAME= -#MONGODB_PASSWORD= -#MONGODB_AUTHDB= -#TARGET_BUCKET_URL=[s3://... | gs://...] (must be ended with /) TARGET_BUCKET_URL=${TARGET_PUBLIC_BUCKET_URL} CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID} @@ -118,8 +111,6 @@ if [ "x${CLOUDFLARE_ACCOUNT_ID}" != "x" ]; then elif [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then # transfer tarball to Amazon S3 s3_copy_file ${TARBALL_FULLPATH} ${TARGET_BUCKET_URL} -elif [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "gs" ]; then - gs_copy_file ${TARBALL_FULLPATH} ${TARGET_BUCKET_URL} fi # call healthchecks url for successful backup diff --git a/bin/backup_full_snapshot.sh b/bin/backup_full_snapshot.sh index 02b4f7a..5e68e24 100755 --- a/bin/backup_full_snapshot.sh +++ b/bin/backup_full_snapshot.sh @@ -6,14 +6,6 @@ echo "Starting backup_snapshot.sh script execution..." BACKUPFILE_PREFIX=${BACKUPFILE_PREFIX:-backup} MONGODB_HOST=${MONGODB_HOST:-mongo} CRONMODE=${CRONMODE:-false} -#MONGODB_URI= -#MONGODB_HOST= -#MONGODB_DBNAME= -#MONGODB_USERNAME= -#MONGODB_PASSWORD= -#MONGODB_AUTHDB= -#MONGODUMP_OPTS= -#TARGET_BUCKET_URL=[s3://... | gs://...] (must be ended with /) TARGET_BUCKET_URL=${TARGET_PRIVATE_BUCKET_URL} CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID} @@ -89,8 +81,6 @@ if [ "x${CLOUDFLARE_ACCOUNT_ID}" != "x" ]; then elif [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then # transfer tarball to Amazon S3 s3_copy_file ${TARBALL_FULLPATH} ${TARGET_BUCKET_URL} -elif [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "gs" ]; then - gs_copy_file ${TARBALL_FULLPATH} ${TARGET_BUCKET_URL} fi # call healthchecks url for successful backup diff --git a/bin/functions.sh b/bin/functions.sh index 303ec51..be5ae90 100755 --- a/bin/functions.sh +++ b/bin/functions.sh @@ -4,14 +4,6 @@ AWSCLI_COPY_OPT="s3 cp" AWSCLI_LIST_OPT="s3 ls" AWSCLI_DEL_OPT="s3 rm" AWSCLIOPT=${AWSCLIOPT:-} -#AWSCLI_ENDPOINT_OPT=${AWSCLI_ENDPOINT_OPT:+"--endpoint-url ${AWSCLI_ENDPOINT_OPT}"} -AWSCLI_ENDPOINT_OPT="" - -GCSCLI="/root/google-cloud-sdk/bin/gsutil" -GCSCLI_COPY_OPT="cp" -GCSCLI_LIST_OPT="ls" -GCSCLI_DEL_OPT="rm" -GCSCLIOPT=${GCSCLIOPT:-} CLOUDFLARE_S3_CLIENT_SCRIPT="./cloudflare_s3_operations.py" CLOUDFLARE_UPLOAD_SCRIPT="./cloudflare_python/cloudflare_upload_file.py" @@ -27,11 +19,7 @@ DATE_CMD="/bin/date" # arguments: 1. s3 url (s3://.../...) s3_exists() { if [ $# -ne 1 ]; then return 255; fi - ${AWSCLI} ${AWSCLI_ENDPOINT_OPT} ${AWSCLIOPT} ${AWSCLI_LIST_OPT} $1 >/dev/null -} -gs_exists() { - if [ $# -ne 1 ]; then return 255; fi - ${GCSCLI} ${GCSCLIOPT} ${GCSCLI_LIST_OPT} $1 >/dev/null + ${AWSCLI} ${AWSCLIOPT} ${AWSCLI_LIST_OPT} $1 >/dev/null } # Check the existence of specified file on Cloudflare R2 bucket. # arguments: 1. CLOUDFLARE_ACCOUNT_ID @@ -48,10 +36,7 @@ r2_exists() { # Output the list of the files on specified S3 URL. # arguments: 1. s3 url (s3://...) s3_list_files() { - ${AWSCLI} ${AWSCLI_ENDPOINT_OPT} ${AWSCLIOPT} ${AWSCLI_LIST_OPT} $1 -} -gs_list_files() { - ${GCSCLI} ${GCSCLIOPT} ${GCSCLI_LIST_OPT} $1 + ${AWSCLI} ${AWSCLIOPT} ${AWSCLI_LIST_OPT} $1 } # Output the list of the files on specified Cloudflare R2. # arguments: 1. CLOUDFLARE_ACCOUNT_ID @@ -68,11 +53,7 @@ r2_list_files() { # arguments: 1. s3 url (s3://.../...) s3_delete_file() { if [ $# -ne 1 ]; then return 255; fi - ${AWSCLI} ${AWSCLI_ENDPOINT_OPT} ${AWSCLIOPT} ${AWSCLI_DEL_OPT} $1 -} -gs_delete_file() { - if [ $# -ne 1 ]; then return 255; fi - ${GCSCLI} ${GCSCLIOPT} ${GCSCLI_DEL_OPT} $1 + ${AWSCLI} ${AWSCLIOPT} ${AWSCLI_DEL_OPT} $1 } # Delete the specified file on Cloudflare R2 bucket. # arguments: 1. CLOUDFLARE_ACCOUNT_ID @@ -96,14 +77,10 @@ r2_delete_file() { # 1. source s3 url (s3://...) # 2. target s3 url (s3://...) s3_copy_file() { - echo ${AWSCLI} ${AWSCLI_ENDPOINT_OPT} ${AWSCLIOPT} ${AWSCLI_COPY_OPT} $1 $2 + echo ${AWSCLI} ${AWSCLIOPT} ${AWSCLI_COPY_OPT} $1 $2 if [ $# -ne 2 ]; then return 255; fi ${AWSCLI} ${AWSCLI_ENDPOINT_OPT} ${AWSCLIOPT} ${AWSCLI_COPY_OPT} $1 $2 } -gs_copy_file() { - if [ $# -ne 2 ]; then return 255; fi - ${GCSCLI} ${GCSCLIOPT} ${GCSCLI_COPY_OPT} $1 $2 -} # Copy the specified file to Cloudflare R2. # arguments: 1. CLOUDFLARE_ACCOUNT_ID # 2. CLOUDFLARE_R2_ACCESS_KEY @@ -156,17 +133,6 @@ s3_delete_file_if_delete_backup_day() { fi fi } -gs_delete_file_if_delete_backup_day() { - if [ $# -ne 3 ]; then return 255; fi - if check_is_delete_backup_day $2 $3; then - if gs_exists $1; then - gs_delete_file $1 - echo "DELETED past backuped file on GS: $1" - else - echo "not found past backuped file on GS: $1" - fi - fi -} # arguments: 1. CLOUDFLARE_ACCOUNT_ID # 2. CLOUDFLARE_R2_ACCESS_KEY # 3. CLOUDFLARE_R2_SECRET_KEY diff --git a/bin/list.sh b/bin/list.sh index e3d1883..44d6b47 100755 --- a/bin/list.sh +++ b/bin/list.sh @@ -14,9 +14,6 @@ do if [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then echo "There are files below in '${TARGET_BUCKET_URL}' S3 bucket:" s3_list_files ${TARGET_BUCKET_URL} - elif [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "gs" ]; then - echo "There are files below in '${TARGET_BUCKET_URL}' GS bucket:" - gs_list_files ${TARGET_BUCKET_URL} fi fi done diff --git a/bin/prune.sh b/bin/prune.sh index a9d12c9..4eee35c 100755 --- a/bin/prune.sh +++ b/bin/prune.sh @@ -32,7 +32,5 @@ do echo "pruning ${TARGET_BUCKET_URL}" if [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then s3_delete_file_if_delete_backup_day ${TARGET_BUCKET_URL}${TARBALL_PAST} ${DELETE_TARGET_DAYS_LEFT} ${DELETE_DEVIDE} - elif [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "gs" ]; then - gs_delete_file_if_delete_backup_day ${TARGET_BUCKET_URL}${TARBALL_PAST} ${DELETE_TARGET_DAYS_LEFT} ${DELETE_DEVIDE} fi done \ No newline at end of file From e9cf7a17d9ce842f77c9f16dbc29c135b2a7d735 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 19:54:26 -0300 Subject: [PATCH 12/21] Merge both public and private backups in one file --- Dockerfile | 2 +- README.md | 2 +- bin/backup.sh | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ bin/functions.sh | 10 ++--- bin/list.sh | 28 +++++++------- bin/prune.sh | 24 ++++++------ 6 files changed, 130 insertions(+), 35 deletions(-) create mode 100755 bin/backup.sh diff --git a/Dockerfile b/Dockerfile index 08cc64e..2200c92 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,4 +22,4 @@ ENV AWS_DEFAULT_REGION=ap-northeast-1 COPY bin /opt/bin WORKDIR /opt/bin ENTRYPOINT ["/opt/bin/entrypoint.sh"] -CMD ["backup_full_snapshot", "backup_filtered_data", "prune", "list"] +CMD ["backup", "prune", "list"] diff --git a/README.md b/README.md index bb8777c..fe26ef6 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ docker run --rm \ -e CLOUDFLARE_R2_ACCESS_KEY= \ -e CLOUDFLARE_R2_SECRET_KEY= \ -e CLOUDFLARE_R2_PUBLIC_BUCKET= \ - -e CLOUDFLARE_R2_PRIVATE_BUCKET= \ + -e CLOUDFLARE_R2_BUCKET= \ [ -e BACKUPFILE_PREFIX= \ ] [ -e MONGODB_HOST= \ ] diff --git a/bin/backup.sh b/bin/backup.sh new file mode 100755 index 0000000..1c90b30 --- /dev/null +++ b/bin/backup.sh @@ -0,0 +1,99 @@ +#!/bin/bash -e + +echo "Starting backup.sh script execution..." + +# settings +IS_PUBLIC_BACKUP=${IS_PUBLIC_BACKUP:-false} +BACKUPFILE_PREFIX=${BACKUPFILE_PREFIX:-backup} +MONGODB_HOST=${MONGODB_HOST:-mongo} +CRONMODE=${CRONMODE:-false} +CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID} +if [ "${IS_PUBLIC_BACKUP}" == "true" ]; then + TARGET_BUCKET_URL=${TARGET_PUBLIC_BUCKET_URL} +else + TARGET_BUCKET_URL=${TARGET_PRIVATE_BUCKET_URL} +fi + +# start script +CWD=`/usr/bin/dirname $0` +cd $CWD + +. ./functions.sh +NOW=`create_current_yyyymmddhhmmss` + +echo "=== $0 started at `/bin/date "+%Y/%m/%d %H:%M:%S"` ===" + +TMPDIR="/tmp" +TARGET_DIRNAME="mongodump_full_snapshot" +TARGET="${TMPDIR}/${TARGET_DIRNAME}" +TAR_CMD="/bin/tar" +TAR_OPTS="jcvf" + +DIRNAME=`/usr/bin/dirname ${TARGET}` +BASENAME=`/usr/bin/basename ${TARGET}` +TARBALL="${BACKUPFILE_PREFIX}-${NOW}.tar.bz2" +TARBALL_FULLPATH="${TMPDIR}/${TARBALL}" + +# check parameters +# deprecate the old option +if [ "x${TARGET_BUCKET_URL}${CLOUDFLARE_ACCOUNT_ID}" == "x" ]; then + echo "ERROR: At least one of the environment variables TARGET_BUCKET_URL or CLOUDFLARE_ACCOUNT_ID must be specified." 1>&2 + exit 1 +fi +if [ "x${CLOUDFLARE_ACCOUNT_ID}" != "x" ]; then + if [ -z "${CLOUDFLARE_R2_ACCESS_KEY}" ]; then + echo "ERROR: If CLOUDFLARE_ACCOUNT_ID environment variable is defined, you have to define the CLOUDFLARE_R2_ACCESS_KEY as well" 1>&2 + exit 1 + fi + if [ -z "${CLOUDFLARE_R2_SECRET_KEY}" ]; then + echo "ERROR: If CLOUDFLARE_ACCOUNT_ID environment variable is defined, you have to define the CLOUDFLARE_R2_SECRET_KEY as well" 1>&2 + exit 1 + fi + if [ "${IS_PUBLIC_BACKUP}" == "true" ] && [ -z "${CLOUDFLARE_R2_BUCKET}" ]; then + echo "ERROR: If CLOUDFLARE_ACCOUNT_ID environment variable is defined, you have to define the CLOUDFLARE_R2_PUBLIC_BUCKET as well" 1>&2 + exit 1 + fi + if [ "${IS_PUBLIC_BACKUP}" == "false" ] && [ -z "${CLOUDFLARE_R2_BUCKET}" ]; then + echo "ERROR: If CLOUDFLARE_ACCOUNT_ID environment variable is defined, you have to define the CLOUDFLARE_R2_PRIVATE_BUCKET as well" 1>&2 + exit 1 + fi +fi + +# dump databases +MONGODUMP_OPTS="--uri=${MONGODB_URI} ${MONGODUMP_OPTS}" +echo "dump MongoDB aiidprod to the local filesystem..." +mongodump -o ${TARGET} ${MONGODUMP_OPTS} + +# Dump Translations database +MONGODUMP_OPTS_TRANSLATIONS="--uri=${MONGODB_URI_TRANSLATIONS}" +echo "dump MongoDB translations to the local filesystem..." +mongodump -o ${TARGET} ${MONGODUMP_OPTS_TRANSLATIONS} + +echo "Report contents are subject to their own intellectual property rights. Unless otherwise noted, the database is shared under (CC BY-SA 4.0). See: https://creativecommons.org/licenses/by-sa/4.0/" > ${TARGET}/license.txt + +ls -lah +echo "---" +ls -lah ${TARGET} + +# run tar command +echo "Start backup ${TARGET} into ${TARGET_BUCKET_URL} ..." +time ${TAR_CMD} ${TAR_OPTS} ${TARBALL_FULLPATH} -C ${DIRNAME} ${BASENAME} + +if [ "x${CLOUDFLARE_ACCOUNT_ID}" != "x" ]; then + # upload tarball to Cloudflare R2 + r2_copy_file ${CLOUDFLARE_ACCOUNT_ID} ${CLOUDFLARE_R2_ACCESS_KEY} ${CLOUDFLARE_R2_SECRET_KEY} ${CLOUDFLARE_R2_BUCKET} ${TARBALL_FULLPATH} ${TARBALL} +elif [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then + # transfer tarball to Amazon S3 + s3_copy_file ${TARBALL_FULLPATH} ${TARGET_BUCKET_URL} +fi + +# call healthchecks url for successful backup +if [ "x${HEALTHCHECKS_URL}" != "x" ]; then + curl -fsS --retry 3 ${HEALTHCHECKS_URL} > /dev/null +fi + +# clean up working files if in cron mode +if ${CRONMODE} ; then + rm -rf ${TARGET} + rm -f ${TARBALL_FULLPATH} +fi diff --git a/bin/functions.sh b/bin/functions.sh index be5ae90..b7116cb 100755 --- a/bin/functions.sh +++ b/bin/functions.sh @@ -25,7 +25,7 @@ s3_exists() { # arguments: 1. CLOUDFLARE_ACCOUNT_ID # 2. CLOUDFLARE_R2_ACCESS_KEY # 3. CLOUDFLARE_R2_SECRET_KEY -# 4. Cloudflare R2 Bucket name CLOUDFLARE_R2_PUBLIC_BUCKET or CLOUDFLARE_R2_PRIVATE_BUCKET (ie: aiid-public) +# 4. Cloudflare R2 Bucket name (ie: aiid-public) # 5. File path for the bucket item (ie: backup-20231009233543.tar.bz2) r2_exists() { if [ $# -ne 5 ]; then return 255; fi @@ -42,7 +42,7 @@ s3_list_files() { # arguments: 1. CLOUDFLARE_ACCOUNT_ID # 2. CLOUDFLARE_R2_ACCESS_KEY # 3. CLOUDFLARE_R2_SECRET_KEY -# 4. Cloudflare R2 Bucket name CLOUDFLARE_R2_PUBLIC_BUCKET or CLOUDFLARE_R2_PRIVATE_BUCKET (ie: aiid-public) +# 4. Cloudflare R2 Bucket name (ie: aiid-public) r2_list_files() { if [ $# -ne 4 ]; then return 255; fi echo "python3 ${CLOUDFLARE_S3_CLIENT_SCRIPT} --operation list --account_id $1 --access_key $2 --secret_key $3 --bucket_name $4" @@ -59,7 +59,7 @@ s3_delete_file() { # arguments: 1. CLOUDFLARE_ACCOUNT_ID # 2. CLOUDFLARE_R2_ACCESS_KEY # 3. CLOUDFLARE_R2_SECRET_KEY -# 4. Cloudflare R2 Bucket name CLOUDFLARE_R2_PUBLIC_BUCKET or CLOUDFLARE_R2_PRIVATE_BUCKET (ie: aiid-public) +# 4. Cloudflare R2 Bucket name (ie: aiid-public) # 5. File path for the bucket item (ie: backup-20231009233543.tar.bz2) r2_delete_file() { if [ $# -ne 5 ]; then return 255; fi @@ -85,7 +85,7 @@ s3_copy_file() { # arguments: 1. CLOUDFLARE_ACCOUNT_ID # 2. CLOUDFLARE_R2_ACCESS_KEY # 3. CLOUDFLARE_R2_SECRET_KEY -# 4. Cloudflare R2 Bucket name CLOUDFLARE_R2_PUBLIC_BUCKET or CLOUDFLARE_R2_PRIVATE_BUCKET (ie: aiid-public) +# 4. Cloudflare R2 Bucket name (ie: aiid-public) # 5. File path to upload (ie: /tmp/backup-20231009233543.tar.bz2) # 6. File key for the bucket item (ie: backup-20231009233543.tar.bz2) r2_copy_file() { @@ -136,7 +136,7 @@ s3_delete_file_if_delete_backup_day() { # arguments: 1. CLOUDFLARE_ACCOUNT_ID # 2. CLOUDFLARE_R2_ACCESS_KEY # 3. CLOUDFLARE_R2_SECRET_KEY -# 4. Cloudflare R2 Bucket name CLOUDFLARE_R2_PUBLIC_BUCKET or CLOUDFLARE_R2_PRIVATE_BUCKET (ie: aiid-public) +# 4. Cloudflare R2 Bucket name (ie: aiid-public) # 5. File path for the bucket item (ie: backup-20231009233543.tar.bz2) # 6. how many days ago to be deleted # 7. divide number diff --git a/bin/list.sh b/bin/list.sh index 44d6b47..40f2a3e 100755 --- a/bin/list.sh +++ b/bin/list.sh @@ -7,22 +7,20 @@ CWD=`/usr/bin/dirname $0` cd $CWD . ./functions.sh -for TARGET_BUCKET_URL in ${TARGET_PRIVATE_BUCKET_URL} ${TARGET_PUBLIC_BUCKET_URL} -do - if [ "x${TARGET_BUCKET_URL}" != "x" ]; then - # output final file list - if [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then - echo "There are files below in '${TARGET_BUCKET_URL}' S3 bucket:" - s3_list_files ${TARGET_BUCKET_URL} - fi - fi -done +if [ "${IS_PUBLIC_BACKUP}" == "true" ]; then + TARGET_BUCKET_URL=${TARGET_PUBLIC_BUCKET_URL} +else + TARGET_BUCKET_URL=${TARGET_PRIVATE_BUCKET_URL} +fi + +# output S3 bucket file list +if [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then + echo "There are files below in '${TARGET_BUCKET_URL}' S3 bucket:" + s3_list_files ${TARGET_BUCKET_URL} +fi # output Cludflare R2 account bucket file list if [ "x${CLOUDFLARE_ACCOUNT_ID}" != "x" ]; then - echo "There are files below in '${CLOUDFLARE_R2_PUBLIC_BUCKET}' R2 bucket:" - r2_list_files ${CLOUDFLARE_ACCOUNT_ID} ${CLOUDFLARE_R2_ACCESS_KEY} ${CLOUDFLARE_R2_SECRET_KEY} ${CLOUDFLARE_R2_PUBLIC_BUCKET} - - echo "There are files below in '${CLOUDFLARE_R2_PRIVATE_BUCKET}' R2 bucket:" - r2_list_files ${CLOUDFLARE_ACCOUNT_ID} ${CLOUDFLARE_R2_ACCESS_KEY} ${CLOUDFLARE_R2_SECRET_KEY} ${CLOUDFLARE_R2_PRIVATE_BUCKET} + echo "There are files below in '${CLOUDFLARE_R2_BUCKET}' R2 bucket:" + r2_list_files ${CLOUDFLARE_ACCOUNT_ID} ${CLOUDFLARE_R2_ACCESS_KEY} ${CLOUDFLARE_R2_SECRET_KEY} ${CLOUDFLARE_R2_BUCKET} fi \ No newline at end of file diff --git a/bin/prune.sh b/bin/prune.sh index 4eee35c..1d03175 100755 --- a/bin/prune.sh +++ b/bin/prune.sh @@ -4,9 +4,13 @@ echo "Starting prune.sh execution..." # settings BACKUPFILE_PREFIX=${BACKUPFILE_PREFIX:-backup} - DELETE_DEVIDE=${DELETE_DEVIDE:-3} DELETE_TARGET_DAYS_LEFT=${DELETE_TARGET_DAYS_LEFT:-4} +if [ "${IS_PUBLIC_BACKUP}" == "true" ]; then + TARGET_BUCKET_URL=${TARGET_PUBLIC_BUCKET_URL} +else + TARGET_BUCKET_URL=${TARGET_PRIVATE_BUCKET_URL} +fi # start script CWD=`/usr/bin/dirname $0` @@ -20,17 +24,11 @@ PAST=`create_past_yyyymmdd ${DELETE_TARGET_DAYS_LEFT}` TARBALL_PAST="${BACKUPFILE_PREFIX}-${PAST}.tar.bz2" if [ "x${CLOUDFLARE_ACCOUNT_ID}" != "x" ]; then - echo "pruning Cloudflare R2 account bucket ${CLOUDFLARE_R2_PUBLIC_BUCKET}" - r2_delete_file_if_delete_backup_day ${CLOUDFLARE_ACCOUNT_ID} ${CLOUDFLARE_R2_ACCESS_KEY} ${CLOUDFLARE_R2_SECRET_KEY} ${CLOUDFLARE_R2_PUBLIC_BUCKET} ${TARBALL_PAST} ${DELETE_TARGET_DAYS_LEFT} ${DELETE_DEVIDE} - - echo "pruning Cloudflare R2 account bucket ${CLOUDFLARE_R2_PRIVATE_BUCKET}" - r2_delete_file_if_delete_backup_day ${CLOUDFLARE_ACCOUNT_ID} ${CLOUDFLARE_R2_ACCESS_KEY} ${CLOUDFLARE_R2_SECRET_KEY} ${CLOUDFLARE_R2_PRIVATE_BUCKET} ${TARBALL_PAST} ${DELETE_TARGET_DAYS_LEFT} ${DELETE_DEVIDE} + echo "pruning Cloudflare R2 account bucket ${CLOUDFLARE_R2_BUCKET} ..." + r2_delete_file_if_delete_backup_day ${CLOUDFLARE_ACCOUNT_ID} ${CLOUDFLARE_R2_ACCESS_KEY} ${CLOUDFLARE_R2_SECRET_KEY} ${CLOUDFLARE_R2_BUCKET} ${TARBALL_PAST} ${DELETE_TARGET_DAYS_LEFT} ${DELETE_DEVIDE} fi -for TARGET_BUCKET_URL in ${TARGET_PRIVATE_BUCKET_URL} ${TARGET_PUBLIC_BUCKET_URL} -do - echo "pruning ${TARGET_BUCKET_URL}" - if [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then - s3_delete_file_if_delete_backup_day ${TARGET_BUCKET_URL}${TARBALL_PAST} ${DELETE_TARGET_DAYS_LEFT} ${DELETE_DEVIDE} - fi -done \ No newline at end of file +if [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then + echo "pruning S3 bucket ${TARGET_BUCKET_URL} ..." + s3_delete_file_if_delete_backup_day ${TARGET_BUCKET_URL}${TARBALL_PAST} ${DELETE_TARGET_DAYS_LEFT} ${DELETE_DEVIDE} +fi \ No newline at end of file From 0299de95dbe4815053c4aad4fc126e31b552afe4 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 19:55:19 -0300 Subject: [PATCH 13/21] Create run_private_backup.yml --- .github/workflows/run_private_backup.yml | 73 ++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 .github/workflows/run_private_backup.yml diff --git a/.github/workflows/run_private_backup.yml b/.github/workflows/run_private_backup.yml new file mode 100644 index 0000000..af75b71 --- /dev/null +++ b/.github/workflows/run_private_backup.yml @@ -0,0 +1,73 @@ +name: Private backup to the cloud + +on: + push: + branches: + - master + schedule: + - cron: "0 10 * * 1" # At 10:00 on Monday. + workflow_dispatch: + inputs: + home: + description: 'This can be triggered from the GH page' + required: false + default: 'This is not used' + +env: + AWS_REGION: us-east-1 # set this to your preferred AWS region, e.g. us-west-1 + +defaults: + run: + shell: bash + +jobs: + build-and-run-backups: + name: Backup + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + load: true + tags: aiid-docker-backup:latest + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_S3_BACKUP_USER_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_S3_BACKUP_USER_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Generate private backup + run: | + docker run --rm \ + -e IS_PUBLIC_BACKUP="true" \ + -e AWS_ACCESS_KEY_ID="${{ secrets.AWS_S3_BACKUP_USER_ACCESS_KEY_ID }}" \ + -e AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_S3_BACKUP_USER_SECRET_ACCESS_KEY }}" \ + -e TARGET_PRIVATE_BUCKET_URL="${{ vars.AWS_S3_PRIVATE_BUCKET }}" \ + -e TARGET_PUBLIC_BUCKET_URL="${{ vars.AWS_S3_PUBLIC_BUCKET }}" \ + -e CLOUDFLARE_ACCOUNT_ID="${{ vars.CLOUDFLARE_ACCOUNT_ID }}" \ + -e CLOUDFLARE_R2_ACCESS_KEY="${{ secrets.CLOUDFLARE_R2_ACCESS_KEY }}" \ + -e CLOUDFLARE_R2_SECRET_KEY="${{ secrets.CLOUDFLARE_R2_SECRET_KEY }}" \ + -e CLOUDFLARE_R2_BUCKET="${{ vars.CLOUDFLARE_R2_PRIVATE_BUCKET }}" \ + -e BACKUPFILE_PREFIX="backup" \ + -e MONGODB_HOST="mongo" \ + -e MONGODB_URI="mongodb+srv://${{ secrets.MONGO_BACKUP_USER }}:${{ secrets.MONGO_BACKUP_USER_PASSWORD }}@aiiddev-aqdmh.gcp.mongodb.net/aiidprod" \ + -e MONGODB_DBNAME="aiidprod" \ + -e MONGODB_URI_TRANSLATIONS="mongodb+srv://${{ secrets.MONGO_BACKUP_USER }}:${{ secrets.MONGO_BACKUP_USER_PASSWORD }}@aiiddev-aqdmh.gcp.mongodb.net/translations" \ + -e MONGODB_DBNAME_TRANSLATIONS="translations" \ + -e MONGODB_USERNAME="${{ secrets.MONGO_BACKUP_USER }}" \ + -e MONGODB_PASSWORD="${{ secrets.MONGO_BACKUP_USER_PASSWORD }}" \ + -e MONGODB_AUTHDB="admin" \ + -v ~:/mab \ + aiid-docker-backup:latest + From 1a666045257b75bcb46b047e403b43f39077e698 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 19:55:42 -0300 Subject: [PATCH 14/21] Create run_public_backup.yml --- .github/workflows/run_public_backup.yml | 73 +++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 .github/workflows/run_public_backup.yml diff --git a/.github/workflows/run_public_backup.yml b/.github/workflows/run_public_backup.yml new file mode 100644 index 0000000..53053a5 --- /dev/null +++ b/.github/workflows/run_public_backup.yml @@ -0,0 +1,73 @@ +name: Public backup to the cloud + +on: + push: + branches: + - master + schedule: + - cron: "0 10 * * 1" # At 10:00 on Monday. + workflow_dispatch: + inputs: + home: + description: 'This can be triggered from the GH page' + required: false + default: 'This is not used' + +env: + AWS_REGION: us-east-1 # set this to your preferred AWS region, e.g. us-west-1 + +defaults: + run: + shell: bash + +jobs: + build-and-run-backups: + name: Backup + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + load: true + tags: aiid-docker-backup:latest + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_S3_BACKUP_USER_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_S3_BACKUP_USER_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Generate public backup + run: | + docker run --rm \ + -e IS_PUBLIC_BACKUP="true" \ + -e AWS_ACCESS_KEY_ID="${{ secrets.AWS_S3_BACKUP_USER_ACCESS_KEY_ID }}" \ + -e AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_S3_BACKUP_USER_SECRET_ACCESS_KEY }}" \ + -e TARGET_PRIVATE_BUCKET_URL="${{ vars.AWS_S3_PRIVATE_BUCKET }}" \ + -e TARGET_PUBLIC_BUCKET_URL="${{ vars.AWS_S3_PUBLIC_BUCKET }}" \ + -e CLOUDFLARE_ACCOUNT_ID="${{ vars.CLOUDFLARE_ACCOUNT_ID }}" \ + -e CLOUDFLARE_R2_ACCESS_KEY="${{ secrets.CLOUDFLARE_R2_ACCESS_KEY }}" \ + -e CLOUDFLARE_R2_SECRET_KEY="${{ secrets.CLOUDFLARE_R2_SECRET_KEY }}" \ + -e CLOUDFLARE_R2_BUCKET="${{ vars.CLOUDFLARE_R2_PUBLIC_BUCKET }}" \ + -e BACKUPFILE_PREFIX="backup" \ + -e MONGODB_HOST="mongo" \ + -e MONGODB_URI="mongodb+srv://${{ secrets.MONGO_BACKUP_USER }}:${{ secrets.MONGO_BACKUP_USER_PASSWORD }}@aiiddev-aqdmh.gcp.mongodb.net/aiidprod" \ + -e MONGODB_DBNAME="aiidprod" \ + -e MONGODB_URI_TRANSLATIONS="mongodb+srv://${{ secrets.MONGO_BACKUP_USER }}:${{ secrets.MONGO_BACKUP_USER_PASSWORD }}@aiiddev-aqdmh.gcp.mongodb.net/translations" \ + -e MONGODB_DBNAME_TRANSLATIONS="translations" \ + -e MONGODB_USERNAME="${{ secrets.MONGO_BACKUP_USER }}" \ + -e MONGODB_PASSWORD="${{ secrets.MONGO_BACKUP_USER_PASSWORD }}" \ + -e MONGODB_AUTHDB="admin" \ + -v ~:/mab \ + aiid-docker-backup:latest + From 5bd75ce34d615234900643f08bca991739492685 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 19:56:04 -0300 Subject: [PATCH 15/21] Delete .github/workflows/run_backup.yml --- .github/workflows/run_backup.yml | 75 -------------------------------- 1 file changed, 75 deletions(-) delete mode 100644 .github/workflows/run_backup.yml diff --git a/.github/workflows/run_backup.yml b/.github/workflows/run_backup.yml deleted file mode 100644 index 3ba2b78..0000000 --- a/.github/workflows/run_backup.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: Backup to the cloud - -on: - push: - branches: - - master - - staging - - classifications-csv - schedule: - - cron: "0 10 * * 1" # At 10:00 on Monday. - workflow_dispatch: - inputs: - home: - description: 'This can be triggered from the GH page' - required: false - default: 'This is not used' - -env: - AWS_REGION: us-east-1 # set this to your preferred AWS region, e.g. us-west-1 - -defaults: - run: - shell: bash - -jobs: - build-and-run-backups: - name: Backup - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build Docker image - uses: docker/build-push-action@v5 - with: - context: . - file: ./Dockerfile - load: true - tags: aiid-docker-backup:latest - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ secrets.AWS_S3_BACKUP_USER_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_S3_BACKUP_USER_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - - name: Generate public and private backups - run: | - docker run --rm \ - -e AWS_ACCESS_KEY_ID="${{ secrets.AWS_S3_BACKUP_USER_ACCESS_KEY_ID }}" \ - -e AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_S3_BACKUP_USER_SECRET_ACCESS_KEY }}" \ - -e TARGET_PRIVATE_BUCKET_URL="${{ vars.AWS_S3_PRIVATE_BUCKET }}" \ - -e TARGET_PUBLIC_BUCKET_URL="${{ vars.AWS_S3_PUBLIC_BUCKET }}" \ - -e CLOUDFLARE_ACCOUNT_ID="${{ vars.CLOUDFLARE_ACCOUNT_ID }}" \ - -e CLOUDFLARE_R2_ACCESS_KEY="${{ secrets.CLOUDFLARE_R2_ACCESS_KEY }}" \ - -e CLOUDFLARE_R2_SECRET_KEY="${{ secrets.CLOUDFLARE_R2_SECRET_KEY }}" \ - -e CLOUDFLARE_R2_PUBLIC_BUCKET="${{ vars.CLOUDFLARE_R2_PUBLIC_BUCKET }}" \ - -e CLOUDFLARE_R2_PRIVATE_BUCKET="${{ vars.CLOUDFLARE_R2_PRIVATE_BUCKET }}" \ - -e BACKUPFILE_PREFIX="backup" \ - -e MONGODB_HOST="mongo" \ - -e MONGODB_URI="mongodb+srv://${{ secrets.MONGO_BACKUP_USER }}:${{ secrets.MONGO_BACKUP_USER_PASSWORD }}@aiiddev-aqdmh.gcp.mongodb.net/aiidprod" \ - -e MONGODB_DBNAME="aiidprod" \ - -e MONGODB_URI_TRANSLATIONS="mongodb+srv://${{ secrets.MONGO_BACKUP_USER }}:${{ secrets.MONGO_BACKUP_USER_PASSWORD }}@aiiddev-aqdmh.gcp.mongodb.net/translations" \ - -e MONGODB_DBNAME_TRANSLATIONS="translations" \ - -e MONGODB_USERNAME="${{ secrets.MONGO_BACKUP_USER }}" \ - -e MONGODB_PASSWORD="${{ secrets.MONGO_BACKUP_USER_PASSWORD }}" \ - -e MONGODB_AUTHDB="admin" \ - -v ~:/mab \ - aiid-docker-backup:latest - From 920af7196fb00eb030bdfe8bbe944a48ad71aa3b Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 20:11:07 -0300 Subject: [PATCH 16/21] Add logic to private and public backups on `backup.sh` file --- bin/backup.sh | 55 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/bin/backup.sh b/bin/backup.sh index 1c90b30..f95f548 100755 --- a/bin/backup.sh +++ b/bin/backup.sh @@ -59,15 +59,52 @@ if [ "x${CLOUDFLARE_ACCOUNT_ID}" != "x" ]; then fi fi -# dump databases -MONGODUMP_OPTS="--uri=${MONGODB_URI} ${MONGODUMP_OPTS}" -echo "dump MongoDB aiidprod to the local filesystem..." -mongodump -o ${TARGET} ${MONGODUMP_OPTS} - -# Dump Translations database -MONGODUMP_OPTS_TRANSLATIONS="--uri=${MONGODB_URI_TRANSLATIONS}" -echo "dump MongoDB translations to the local filesystem..." -mongodump -o ${TARGET} ${MONGODUMP_OPTS_TRANSLATIONS} +if [ "${IS_PUBLIC_BACKUP}" == "true" ]; then + echo "Dump MongoDB 'aiidprod' database..." + mongodump -o ${TARGET} --uri=${MONGODB_URI} --excludeCollection=classifications + echo "Dump filtered 'classifications' collection..." + mongodump -o ${TARGET} --uri=${MONGODB_URI} --collection=classifications --query='{ "publish": true }' + + echo "Dump MongoDB 'translations' database..." + mongodump -o ${TARGET} --uri=${MONGODB_URI_TRANSLATIONS} + + echo "Export collections as CSV files..." + mongoexport -o ${TARGET}/incidents.csv --uri=${MONGODB_URI} -v --type=csv --collection=incidents --fields=_id,incident_id,date,reports,Alleged\ deployer\ of\ AI\ system,Alleged\ developer\ of\ AI\ system,Alleged\ harmed\ or\ nearly\ harmed\ parties,description,title + mongoexport -o ${TARGET}/duplicates.csv --uri=${MONGODB_URI} -v --type=csv --collection=duplicates --fields=duplicate_incident_number,true_incident_number + mongoexport -o ${TARGET}/quickadd.csv --uri=${MONGODB_URI} -v --type=csv --collection=quickadd --fields=incident_id,url,date_submitted,source_domain + mongoexport -o ${TARGET}/submissions.csv --uri=${MONGODB_URI} -v --type=csv --collection=submissions --fields=authors,date_downloaded,date_modified,date_published,date_submitted,image_url,incident_date,incident_id,language,mongodb_id,source_domain,submitters,text,title,url + mongoexport -o ${TARGET}/reports.csv --uri=${MONGODB_URI} -v --type=csv --collection=reports --fields=_id,incident_id,authors,date_downloaded,date_modified,date_published,date_submitted,description,epoch_date_downloaded,epoch_date_modified,epoch_date_published,epoch_date_submitted,image_url,language,ref_number,report_number,source_domain,submitters,text,title,url,tags + + # Taxa CSV Export + + # Get the field names + mongoexport -o classifications_cset_headers.csv --uri=${MONGODB_URI} -v --type=csv --query='{ "namespace": "CSET", "publish": true }' --collection=classifications --noHeaderLine --fields='attributes.0.short_name,attributes.1.short_name,attributes.2.short_name,attributes.3.short_name,attributes.4.short_name,attributes.5.short_name,attributes.6.short_name,attributes.7.short_name,attributes.8.short_name,attributes.9.short_name,attributes.10.short_name,attributes.11.short_name,attributes.12.short_name,attributes.13.short_name,attributes.14.short_name,attributes.15.short_name,attributes.16.short_name,attributes.17.short_name,attributes.18.short_name,attributes.19.short_name,attributes.20.short_name,attributes.21.short_name,attributes.22.short_name,attributes.23.short_name,attributes.24.short_name,attributes.25.short_name,attributes.26.short_name,attributes.27.short_name,attributes.28.short_name,attributes.29.short_name,attributes.30.short_name,attributes.31.short_name' + + # Get the values + mongoexport -o classifications_cset_values.csv --uri=${MONGODB_URI} -v --query='{ "namespace": "CSET", "publish": true }' --type=csv --collection=classifications --noHeaderLine --fields='_id,incident_id,namespace,publish,attributes.0.value_json,attributes.1.value_json,attributes.2.value_json,attributes.3.value_json,attributes.4.value_json,attributes.5.value_json,attributes.6.value_json,attributes.7.value_json,attributes.8.value_json,attributes.9.value_json,attributes.10.value_json,attributes.11.value_json,attributes.12.value_json,attributes.13.value_json,attributes.14.value_json,attributes.15.value_json,attributes.16.value_json,attributes.17.value_json,attributes.18.value_json,attributes.19.value_json,attributes.20.value_json,attributes.21.value_json,attributes.22.value_json,attributes.23.value_json,attributes.24.value_json,attributes.25.value_json,attributes.26.value_json,attributes.27.value_json,attributes.28.value_json,attributes.29.value_json,attributes.30.value_json,attributes.31.value_json' + + # Construct the header + echo -n "_id,incident_id,namespace,publish," > tmp.csv + head -n 1 classifications_cset_headers.csv > tmp_header.csv + cat tmp.csv tmp_header.csv > header.csv + + # Concat the header and the values to the output + cat header.csv classifications_cset_values.csv > ${TARGET}/classifications_cset.csv + + # Cleanup + rm tmp.csv + rm tmp_header.csv + rm header.csv + rm classifications_cset_headers.csv + rm classifications_cset_values.csv + +else # Private backup + echo "Dump MongoDB 'aiidprod' database..." + mongodump -o ${TARGET} --uri=${MONGODB_URI} + + echo "Dump MongoDB 'translations' database..." + mongodump -o ${TARGET} --uri=${MONGODB_URI_TRANSLATIONS} +fi echo "Report contents are subject to their own intellectual property rights. Unless otherwise noted, the database is shared under (CC BY-SA 4.0). See: https://creativecommons.org/licenses/by-sa/4.0/" > ${TARGET}/license.txt From 1a1fd46ee72806eec49ea3658596d3b934dbad4c Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 20:18:16 -0300 Subject: [PATCH 17/21] Remove unnecessary `ls` commands --- bin/backup.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bin/backup.sh b/bin/backup.sh index f95f548..521a109 100755 --- a/bin/backup.sh +++ b/bin/backup.sh @@ -108,10 +108,6 @@ fi echo "Report contents are subject to their own intellectual property rights. Unless otherwise noted, the database is shared under (CC BY-SA 4.0). See: https://creativecommons.org/licenses/by-sa/4.0/" > ${TARGET}/license.txt -ls -lah -echo "---" -ls -lah ${TARGET} - # run tar command echo "Start backup ${TARGET} into ${TARGET_BUCKET_URL} ..." time ${TAR_CMD} ${TAR_OPTS} ${TARBALL_FULLPATH} -C ${DIRNAME} ${BASENAME} From b8b9d9fd573709d8270b4ff76c806ec8ee7b7538 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 20:26:51 -0300 Subject: [PATCH 18/21] Upgrade to actions/checkout@v4 --- .github/workflows/run_public_backup.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_public_backup.yml b/.github/workflows/run_public_backup.yml index 53053a5..489a184 100644 --- a/.github/workflows/run_public_backup.yml +++ b/.github/workflows/run_public_backup.yml @@ -27,7 +27,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 From b7fc298e815e204ae1a563da42f2c3a775e57dd8 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 20:30:50 -0300 Subject: [PATCH 19/21] Upgrade to actions/checkout@v4 --- .github/workflows/run_private_backup.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_private_backup.yml b/.github/workflows/run_private_backup.yml index af75b71..0a24ab5 100644 --- a/.github/workflows/run_private_backup.yml +++ b/.github/workflows/run_private_backup.yml @@ -27,7 +27,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 From 9b5f7673b797fc5a8479ee889d75660db6f125a6 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Thu, 2 Nov 2023 20:50:13 -0300 Subject: [PATCH 20/21] Delete individual backup files --- bin/backup_filtered_data.sh | 126 ------------------------------------ bin/backup_full_snapshot.sh | 95 --------------------------- 2 files changed, 221 deletions(-) delete mode 100755 bin/backup_filtered_data.sh delete mode 100755 bin/backup_full_snapshot.sh diff --git a/bin/backup_filtered_data.sh b/bin/backup_filtered_data.sh deleted file mode 100755 index 640d55c..0000000 --- a/bin/backup_filtered_data.sh +++ /dev/null @@ -1,126 +0,0 @@ -#!/bin/bash -e - -echo "Starting backup_csv_data.sh script execution..." - -# settings -BACKUPFILE_PREFIX=${BACKUPFILE_PREFIX:-backup} -MONGODB_HOST=${MONGODB_HOST:-mongo} -CRONMODE=${CRONMODE:-false} -TARGET_BUCKET_URL=${TARGET_PUBLIC_BUCKET_URL} -CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID} - -# start script -CWD=`/usr/bin/dirname $0` -cd $CWD - -. ./functions.sh -NOW=`create_current_yyyymmddhhmmss` - -echo "=== $0 started at `/bin/date "+%Y/%m/%d %H:%M:%S"` ===" - -TMPDIR="/tmp" -TARGET_DIRNAME="mongodump_filtered_data" -TARGET="${TMPDIR}/${TARGET_DIRNAME}" -TAR_CMD="/bin/tar" -TAR_OPTS="jcvf" - -DIRNAME=`/usr/bin/dirname ${TARGET}` -BASENAME=`/usr/bin/basename ${TARGET}` -TARBALL="${BACKUPFILE_PREFIX}-${NOW}.tar.bz2" -TARBALL_FULLPATH="${TMPDIR}/${TARBALL}" - -# check parameters -# deprecate the old option -if [ "x${S3_TARGET_BUCKET_URL}" != "x" ]; then - echo "WARNING: The environment variable S3_TARGET_BUCKET_URL is deprecated. Please use TARGET_BUCKET_URL instead." - TARGET_BUCKET_URL=$S3_TARGET_BUCKET_URL -fi -if [ "x${TARGET_BUCKET_URL}${CLOUDFLARE_ACCOUNT_ID}" == "x" ]; then - echo "ERROR: At least one of the environment variables TARGET_BUCKET_URL or CLOUDFLARE_ACCOUNT_ID must be specified." 1>&2 - exit 1 -fi -if [ "x${CLOUDFLARE_ACCOUNT_ID}" != "x" ]; then - if [ -z "${CLOUDFLARE_R2_ACCESS_KEY}" ]; then - echo "ERROR: If CLOUDFLARE_ACCOUNT_ID environment variable is defined, you have to define the CLOUDFLARE_R2_ACCESS_KEY as well" 1>&2 - exit 1 - fi - if [ -z "${CLOUDFLARE_R2_SECRET_KEY}" ]; then - echo "ERROR: If CLOUDFLARE_ACCOUNT_ID environment variable is defined, you have to define the CLOUDFLARE_R2_SECRET_KEY as well" 1>&2 - exit 1 - fi - if [ -z "${CLOUDFLARE_R2_PUBLIC_BUCKET}" ]; then - echo "ERROR: If CLOUDFLARE_ACCOUNT_ID environment variable is defined, you have to define the CLOUDFLARE_R2_PUBLIC_BUCKET as well" 1>&2 - exit 1 - fi -fi - -# dump databases -echo "Dump MongoDB aiidprod..." -mongodump -o ${TARGET} --uri=${MONGODB_URI} --excludeCollection=classifications -echo "Dump filtered 'classifications' collection..." -mongodump -o ${TARGET} --uri=${MONGODB_URI} --collection=classifications --query='{ "publish": true }' - -# Dump Translations database -echo "Dump MongoDB translations..." -mongodump -o ${TARGET} --uri=${MONGODB_URI_TRANSLATIONS} - -# CSV Export -echo "Export collections as CSV files..." -mongoexport -o ${TARGET}/incidents.csv --uri=${MONGODB_URI} -v --type=csv --collection=incidents --fields=_id,incident_id,date,reports,Alleged\ deployer\ of\ AI\ system,Alleged\ developer\ of\ AI\ system,Alleged\ harmed\ or\ nearly\ harmed\ parties,description,title -mongoexport -o ${TARGET}/duplicates.csv --uri=${MONGODB_URI} -v --type=csv --collection=duplicates --fields=duplicate_incident_number,true_incident_number -mongoexport -o ${TARGET}/quickadd.csv --uri=${MONGODB_URI} -v --type=csv --collection=quickadd --fields=incident_id,url,date_submitted,source_domain -mongoexport -o ${TARGET}/submissions.csv --uri=${MONGODB_URI} -v --type=csv --collection=submissions --fields=authors,date_downloaded,date_modified,date_published,date_submitted,image_url,incident_date,incident_id,language,mongodb_id,source_domain,submitters,text,title,url -mongoexport -o ${TARGET}/reports.csv --uri=${MONGODB_URI} -v --type=csv --collection=reports --fields=_id,incident_id,authors,date_downloaded,date_modified,date_published,date_submitted,description,epoch_date_downloaded,epoch_date_modified,epoch_date_published,epoch_date_submitted,image_url,language,ref_number,report_number,source_domain,submitters,text,title,url,tags - -# Taxa CSV Export - -# Get the field names -mongoexport -o classifications_cset_headers.csv --uri=${MONGODB_URI} -v --type=csv --query='{ "namespace": "CSET", "publish": true }' --collection=classifications --noHeaderLine --fields='attributes.0.short_name,attributes.1.short_name,attributes.2.short_name,attributes.3.short_name,attributes.4.short_name,attributes.5.short_name,attributes.6.short_name,attributes.7.short_name,attributes.8.short_name,attributes.9.short_name,attributes.10.short_name,attributes.11.short_name,attributes.12.short_name,attributes.13.short_name,attributes.14.short_name,attributes.15.short_name,attributes.16.short_name,attributes.17.short_name,attributes.18.short_name,attributes.19.short_name,attributes.20.short_name,attributes.21.short_name,attributes.22.short_name,attributes.23.short_name,attributes.24.short_name,attributes.25.short_name,attributes.26.short_name,attributes.27.short_name,attributes.28.short_name,attributes.29.short_name,attributes.30.short_name,attributes.31.short_name' - -# Get the values -mongoexport -o classifications_cset_values.csv --uri=${MONGODB_URI} -v --query='{ "namespace": "CSET", "publish": true }' --type=csv --collection=classifications --noHeaderLine --fields='_id,incident_id,namespace,publish,attributes.0.value_json,attributes.1.value_json,attributes.2.value_json,attributes.3.value_json,attributes.4.value_json,attributes.5.value_json,attributes.6.value_json,attributes.7.value_json,attributes.8.value_json,attributes.9.value_json,attributes.10.value_json,attributes.11.value_json,attributes.12.value_json,attributes.13.value_json,attributes.14.value_json,attributes.15.value_json,attributes.16.value_json,attributes.17.value_json,attributes.18.value_json,attributes.19.value_json,attributes.20.value_json,attributes.21.value_json,attributes.22.value_json,attributes.23.value_json,attributes.24.value_json,attributes.25.value_json,attributes.26.value_json,attributes.27.value_json,attributes.28.value_json,attributes.29.value_json,attributes.30.value_json,attributes.31.value_json' - -# Construct the header -echo -n "_id,incident_id,namespace,publish," > tmp.csv -head -n 1 classifications_cset_headers.csv > tmp_header.csv -cat tmp.csv tmp_header.csv > header.csv - -# Concat the header and the values to the output -cat header.csv classifications_cset_values.csv > ${TARGET}/classifications_cset.csv - -# Cleanup -rm tmp.csv -rm tmp_header.csv -rm header.csv -rm classifications_cset_headers.csv -rm classifications_cset_values.csv - -echo "Report contents are subject to their own intellectual property rights. Unless otherwise noted, the database is shared under (CC BY-SA 4.0). See: https://creativecommons.org/licenses/by-sa/4.0/" > ${TARGET}/license.txt - -ls -lah -echo "---" -ls -lah ${TARGET} - -# run tar command -echo "Start backup ${TARGET} into ${TARGET_BUCKET_URL} ..." -time ${TAR_CMD} ${TAR_OPTS} ${TARBALL_FULLPATH} -C ${DIRNAME} ${BASENAME} - -if [ "x${CLOUDFLARE_ACCOUNT_ID}" != "x" ]; then - # upload tarball to Cloudflare R2 - r2_copy_file ${CLOUDFLARE_ACCOUNT_ID} ${CLOUDFLARE_R2_ACCESS_KEY} ${CLOUDFLARE_R2_SECRET_KEY} ${CLOUDFLARE_R2_PUBLIC_BUCKET} ${TARBALL_FULLPATH} ${TARBALL} -elif [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then - # transfer tarball to Amazon S3 - s3_copy_file ${TARBALL_FULLPATH} ${TARGET_BUCKET_URL} -fi - -# call healthchecks url for successful backup -if [ "x${HEALTHCHECKS_URL}" != "x" ]; then - curl -fsS --retry 3 ${HEALTHCHECKS_URL} > /dev/null -fi - -# clean up working files if in cron mode -if ${CRONMODE} ; then - rm -rf ${TARGET} - rm -f ${TARBALL_FULLPATH} -fi - diff --git a/bin/backup_full_snapshot.sh b/bin/backup_full_snapshot.sh deleted file mode 100755 index 5e68e24..0000000 --- a/bin/backup_full_snapshot.sh +++ /dev/null @@ -1,95 +0,0 @@ -#!/bin/bash -e - -echo "Starting backup_snapshot.sh script execution..." - -# settings -BACKUPFILE_PREFIX=${BACKUPFILE_PREFIX:-backup} -MONGODB_HOST=${MONGODB_HOST:-mongo} -CRONMODE=${CRONMODE:-false} -TARGET_BUCKET_URL=${TARGET_PRIVATE_BUCKET_URL} -CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID} - -# start script -CWD=`/usr/bin/dirname $0` -cd $CWD - -. ./functions.sh -NOW=`create_current_yyyymmddhhmmss` - -echo "=== $0 started at `/bin/date "+%Y/%m/%d %H:%M:%S"` ===" - -TMPDIR="/tmp" -TARGET_DIRNAME="mongodump_full_snapshot" -TARGET="${TMPDIR}/${TARGET_DIRNAME}" -TAR_CMD="/bin/tar" -TAR_OPTS="jcvf" - -DIRNAME=`/usr/bin/dirname ${TARGET}` -BASENAME=`/usr/bin/basename ${TARGET}` -TARBALL="${BACKUPFILE_PREFIX}-${NOW}.tar.bz2" -TARBALL_FULLPATH="${TMPDIR}/${TARBALL}" - - -# check parameters -# deprecate the old option -if [ "x${S3_TARGET_BUCKET_URL}" != "x" ]; then - echo "WARNING: The environment variable S3_TARGET_BUCKET_URL is deprecated. Please use TARGET_BUCKET_URL instead." - TARGET_BUCKET_URL=$S3_TARGET_BUCKET_URL -fi -if [ "x${TARGET_BUCKET_URL}${CLOUDFLARE_ACCOUNT_ID}" == "x" ]; then - echo "ERROR: At least one of the environment variables TARGET_BUCKET_URL or CLOUDFLARE_ACCOUNT_ID must be specified." 1>&2 - exit 1 -fi -if [ "x${CLOUDFLARE_ACCOUNT_ID}" != "x" ]; then - if [ -z "${CLOUDFLARE_R2_ACCESS_KEY}" ]; then - echo "ERROR: If CLOUDFLARE_ACCOUNT_ID environment variable is defined, you have to define the CLOUDFLARE_R2_ACCESS_KEY as well" 1>&2 - exit 1 - fi - if [ -z "${CLOUDFLARE_R2_SECRET_KEY}" ]; then - echo "ERROR: If CLOUDFLARE_ACCOUNT_ID environment variable is defined, you have to define the CLOUDFLARE_R2_SECRET_KEY as well" 1>&2 - exit 1 - fi - if [ -z "${CLOUDFLARE_R2_PRIVATE_BUCKET}" ]; then - echo "ERROR: If CLOUDFLARE_ACCOUNT_ID environment variable is defined, you have to define the CLOUDFLARE_R2_PRIVATE_BUCKET as well" 1>&2 - exit 1 - fi -fi - -# dump databases -MONGODUMP_OPTS="--uri=${MONGODB_URI} ${MONGODUMP_OPTS}" -echo "dump MongoDB aiidprod to the local filesystem..." -mongodump -o ${TARGET} ${MONGODUMP_OPTS} - -# Dump Translations database -MONGODUMP_OPTS_TRANSLATIONS="--uri=${MONGODB_URI_TRANSLATIONS}" -echo "dump MongoDB translations to the local filesystem..." -mongodump -o ${TARGET} ${MONGODUMP_OPTS_TRANSLATIONS} - -echo "Report contents are subject to their own intellectual property rights. Unless otherwise noted, the database is shared under (CC BY-SA 4.0). See: https://creativecommons.org/licenses/by-sa/4.0/" > ${TARGET}/license.txt - -ls -lah -echo "---" -ls -lah ${TARGET} - -# run tar command -echo "start backup ${TARGET} into ${TARGET_BUCKET_URL} ..." -time ${TAR_CMD} ${TAR_OPTS} ${TARBALL_FULLPATH} -C ${DIRNAME} ${BASENAME} - -if [ "x${CLOUDFLARE_ACCOUNT_ID}" != "x" ]; then - # upload tarball to Cloudflare R2 - r2_copy_file ${CLOUDFLARE_ACCOUNT_ID} ${CLOUDFLARE_R2_ACCESS_KEY} ${CLOUDFLARE_R2_SECRET_KEY} ${CLOUDFLARE_R2_PRIVATE_BUCKET} ${TARBALL_FULLPATH} ${TARBALL} -elif [ `echo $TARGET_BUCKET_URL | cut -f1 -d":"` == "s3" ]; then - # transfer tarball to Amazon S3 - s3_copy_file ${TARBALL_FULLPATH} ${TARGET_BUCKET_URL} -fi - -# call healthchecks url for successful backup -if [ "x${HEALTHCHECKS_URL}" != "x" ]; then - curl -fsS --retry 3 ${HEALTHCHECKS_URL} > /dev/null -fi - -# clean up working files if in cron mode -if ${CRONMODE} ; then - rm -rf ${TARGET} - rm -f ${TARBALL_FULLPATH} -fi From 610f434d504e5639277b955e34c5a38549c8ab65 Mon Sep 17 00:00:00 2001 From: Pablo Costa Date: Wed, 15 Nov 2023 14:00:14 -0300 Subject: [PATCH 21/21] Fix IS_PUBLIC_BACKUP=false on private backup --- .github/workflows/run_private_backup.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_private_backup.yml b/.github/workflows/run_private_backup.yml index 0a24ab5..b626806 100644 --- a/.github/workflows/run_private_backup.yml +++ b/.github/workflows/run_private_backup.yml @@ -50,7 +50,7 @@ jobs: - name: Generate private backup run: | docker run --rm \ - -e IS_PUBLIC_BACKUP="true" \ + -e IS_PUBLIC_BACKUP="false" \ -e AWS_ACCESS_KEY_ID="${{ secrets.AWS_S3_BACKUP_USER_ACCESS_KEY_ID }}" \ -e AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_S3_BACKUP_USER_SECRET_ACCESS_KEY }}" \ -e TARGET_PRIVATE_BUCKET_URL="${{ vars.AWS_S3_PRIVATE_BUCKET }}" \