From 480261bf1b7be3fec0db7459f2adaec648a597e7 Mon Sep 17 00:00:00 2001 From: vkt1414 Date: Thu, 30 Nov 2023 20:54:39 -0500 Subject: [PATCH] create github actions for managing prs and commits --- .github/__init__.py | 0 .github/get_latest_index.py | 95 ++++++++++++ .../workflows/commit-pr-release-manager.yml | 146 ++++++++++++++++++ .github/workflows/get-latest-index.yml | 83 ++++++++++ .github/workflows/python-package.yml | 39 ----- idc_index/index.py | 3 +- setup.py | 4 +- 7 files changed, 328 insertions(+), 42 deletions(-) create mode 100644 .github/__init__.py create mode 100644 .github/get_latest_index.py create mode 100644 .github/workflows/commit-pr-release-manager.yml create mode 100644 .github/workflows/get-latest-index.yml delete mode 100644 .github/workflows/python-package.yml diff --git a/.github/__init__.py b/.github/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.github/get_latest_index.py b/.github/get_latest_index.py new file mode 100644 index 0000000..739eecd --- /dev/null +++ b/.github/get_latest_index.py @@ -0,0 +1,95 @@ +import os +import re +import requests +import sys +import uuid +from google.cloud import bigquery + +class IDCIndexManager: + def __init__(self, project_id): + print("Initializing IDCIndexManager...") + self.project_id = project_id + self.client = bigquery.Client(project=project_id) + + def get_latest_idc_release_version(self, view_id): + print("Getting latest IDC release version...") + view = self.client.get_table(view_id) + latest_idc_release_version=int(re.search(r"idc_v(\d+)", view.view_query).group(1)) + return latest_idc_release_version + + def extract_index_version(self, file_path): + print(f"Extracting index version from {file_path}...") + with open(file_path, "r") as file: + for line in file: + if "def get_idc_version(self):" in line: + return int(re.findall(r"v(\d+)", next(file))[0]) + + def update_index_version(self, file_path, latest_idc_release_version): + print(f"Updating index version in {file_path}...") + with open(file_path, "r") as file: + lines = file.readlines() + with open(file_path, "w") as file: + for i in range(len(lines)): + if "def get_idc_version(self):" in lines[i]: + lines[i + 1] = re.sub( + r"v(\d+)", f"v{latest_idc_release_version}", lines[i + 1] + ) + file.write(lines[i]) + + def update_sql_queries_folder( + self, dir, current_index_version, latest_idc_release_version + ): + print(f"Updating SQL queries from {dir}...") + for file_name in os.listdir(dir): + if file_name.endswith(".sql"): + file_path = os.path.join(dir, file_name) + with open(file_path, "r") as file: + sql_query = file.read() + modified_sql_query = sql_query.replace( + f"idc_v{current_index_version}", f"idc_v{latest_idc_release_version}" + ) + with open(file_path, "w") as file: + file.write(modified_sql_query) + return modified_sql_query + + def execute_sql_query(self, file_path): + print(f"Executing SQL query from {file_path}...") + with open(file_path, "r") as file: + sql_query = file.read() + df = self.client.query(sql_query).to_dataframe() + csv_file_name = f"{os.path.basename(file_path).split('.')[0]}.csv.zip" + return df, csv_file_name + + def create_csv_zip_from_df(self, df, csv_file_name): + print(f"Creating CSV zip file {csv_file_name}...") + df.to_csv(csv_file_name, compression={'method': 'zip'}, escapechar="\\") + + def run_queries_folder(self, dir): + print(f"Running queries from {dir}...") + for file_name in os.listdir(dir): + if file_name.endswith(".sql"): + file_path = os.path.join(dir, file_name) + df, csv_file_name = self.execute_sql_query(file_path) + self.create_csv_zip_from_df(df, csv_file_name) + + def set_multiline_output(self, name, value): + print(f"Setting multiline output {name}...") + with open(os.environ["GITHUB_OUTPUT"], "a") as fh: + delimiter = uuid.uuid1() + print(f"{name}<<{delimiter}", file=fh) + print(value, file=fh) + print(delimiter, file=fh) + + def run(self): + print("Running IDCIndexManager...") + latest_idc_release_version = self.get_latest_idc_release_version("bigquery-public-data.idc_current.dicom_all_view") + print(f"Latest IDC release version: {latest_idc_release_version}") + current_index_version = self.extract_index_version("idc_index/index.py") + print(f"Current index version: {current_index_version}") + self.set_multiline_output("current_index_version", int(current_index_version)) + self.set_multiline_output("latest_idc_release_version", int(latest_idc_release_version)) + + +if __name__ == "__main__": + manager = IDCIndexManager("gcp-project-id") + manager.run() diff --git a/.github/workflows/commit-pr-release-manager.yml b/.github/workflows/commit-pr-release-manager.yml new file mode 100644 index 0000000..d9f095d --- /dev/null +++ b/.github/workflows/commit-pr-release-manager.yml @@ -0,0 +1,146 @@ +name: commit-pr-idc-index-release-manager +on: + workflow_dispatch: + push: + pull_request: + +jobs: + update_idc_index: + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/idc-index + permissions: + id-token: write + contents: write + pull-requests: read + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + + - name: Install dependencies + run: | + pip install requests==2.31.0 pandas==2.1.1 google-cloud-bigquery==3.12.0 \ + pyarrow==13.0.0 db-dtypes==1.1.1 PyGithub==2.1.1 flake8==6.1.0 \ + duckdb==0.9.2 + shell: bash + + - name: Authorize Google Cloud + uses: google-github-actions/auth@v1 + with: + credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }} + create_credentials_file: true + export_environment_variables: true + + - name: Check if queries folder changed in the latest commit or pull request + uses: dorny/paths-filter@v2 + id: pr_proposed_changes + with: + filters: | + queries: + - 'queries/**' + + - name: If queries are modified, run them with bigquery + id: initialize_idc_manager_class + if: steps.pr_proposed_changes.outputs.queries == 'true' + shell: python + run: | + import sys + import os + sys.path.append(".github") + + from get_latest_index import IDCIndexManager + + project_id = os.environ['GCP_PROJECT_ID'] + manager = IDCIndexManager(project_id) + + manager.run_queries_folder("queries/") + env: + GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} + + - name: If queries are not modified download latest index from github + if: steps.pr_proposed_changes.outputs.queries == 'false' || startsWith(github.ref, 'refs/tags/v') + run: + wget -q https://github.com/ImagingDataCommons/idc-index/releases/download/latest/idc_index.csv.zip + shell: bash + + - name: Setup testing + run: | + wget -q "https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_Linux-64bit.tar.gz"\ + && tar -xvzf "s5cmd_2.2.2_Linux-64bit.tar.gz"\ + && rm "s5cmd_2.2.2_Linux-64bit.tar.gz"\ + && mv s5cmd /usr/local/bin/s5cmd + shell: bash + + - name: If queries are modified by a pull request, change latest_index_url to use locally available csv + if: steps.pr_proposed_changes.outputs.queries == 'true' && github.event_name == 'pull_request' + run: | + import os + from pathlib import Path + home_dir = str(Path.home()) + with open('idc_index/index.py', 'r') as file: + filedata = file.read() + filedata = filedata.replace('https://github.com/ImagingDataCommons/idc-index/releases/download/latest/idc_index.csv.zip', os.path.join(home_dir, 'work/idc-index/idc-index/idc_index.csv.zip')) + with open('idc_index/index.py', 'w') as file: + file.write(filedata) + shell: python + + - name: Test package + run: | + python -m unittest -vv tests/idcindex.py + shell: bash + + - name: Create Tagged Release + id: create_tagged_release + if: (startsWith(github.ref, 'refs/tags/v')) && (github.event_name != 'pull_request') + #if: (startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' && github.ref == 'refs/heads/main') + uses: ncipollo/release-action@v1 + with: + prerelease: false + body: 'Versioned idc-index' + artifacts: "*.zip" + + - name: Create latest Release + if: (github.event_name != 'pull_request' && github.ref == 'refs/heads/main') + #if: (github.event_name != 'pull_request' && github.ref == 'refs/heads/main') + uses: crowbarmaster/GH-Automatic-Releases@latest + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + automatic_release_tag: "latest" + #generate_notes: false + body: "Latest idc-index" + prerelease: true + title: "Latest idc-index" + files: | + *.zip + + - name: Get version + uses: mtkennerly/dunamai-action@v1 + #if: (startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' && github.ref == 'refs/heads/main') + with: + env-var: set_pypi_idc_index_version + args: --style semver + + - name: Echo soon to be released pypi version + #if: (startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' && github.ref == 'refs/heads/main') + run: | + echo $set_pypi_idc_index_version + + - name: Build a source tarball + #if: (startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' && github.ref == 'refs/heads/main') + run: python setup.py sdist + + - name: Publish distribution to PyPI + #if: (startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' && github.ref == 'refs/heads/main') + if: startsWith(github.ref, 'refs/tags/v') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ \ No newline at end of file diff --git a/.github/workflows/get-latest-index.yml b/.github/workflows/get-latest-index.yml new file mode 100644 index 0000000..7344522 --- /dev/null +++ b/.github/workflows/get-latest-index.yml @@ -0,0 +1,83 @@ +name: idc-index release manager +on: + workflow_dispatch: + schedule: + - cron: 0 12 */1 * * + +jobs: + update_idc_index: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + + - name: Install dependencies + run: pip install requests==2.31.0 pandas==2.1.1 google-cloud-bigquery==3.12.0 pyarrow==13.0.0 db-dtypes==1.1.1 PyGithub==2.1.1 + + - name: Authorize Google Cloud + uses: google-github-actions/auth@v1 + with: + credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }} + create_credentials_file: true + export_environment_variables: true + + - name: Run script to get the latest idc index + id: initialize_idc_manager_class + shell: python + run: | + import sys + import os + sys.path.append(".github") + + from get_latest_index import IDCIndexManager + + project_id = os.environ['GCP_PROJECT_ID'] + manager = IDCIndexManager(project_id) + + current_index_version = manager.extract_index_version("idc_index/index.py") + latest_idc_release_version = manager.get_latest_idc_release_version("bigquery-public-data.idc_current.dicom_all_view") + + if current_index_version < latest_idc_release_version: + manager.update_index_version("idc_index/index.py", latest_idc_release_version) + manager.update_sql_queries_folder("queries/", current_index_version, latest_idc_release_version) + manager.run_queries_folder("queries/") + + manager.set_multiline_output("current_index_version", int(current_index_version)) + manager.set_multiline_output("latest_idc_release_version", int(latest_idc_release_version)) + env: + GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} + + - name: Create latest Release + id: create_release + if: ${{ steps.initialize_idc_manager_class.outputs.current_index_version != steps.initialize_idc_manager_class.outputs.latest_idc_release_version }} + uses: ncipollo/release-action@v1 + with: + tag: latest + prerelease: true + makeLatest: true + allowUpdates: true + body: 'Latest idc-index' + artifacts: "*.zip" + + - name: Create Pull Request + uses: peter-evans/create-pull-request@v5 + with: + title: Update to v${{ steps.initialize_idc_manager_class.outputs.latest_idc_release_version }} + body: Update sql queries and/or index.py to v${{ steps.initialize_idc_manager_class.outputs.latest_idc_release_version }} + base: main + branch: update-sql-queries-and-or-index + add-paths: | + queries/*.sql + idc_index/index.py + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + + + \ No newline at end of file diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml deleted file mode 100644 index 06cc0c5..0000000 --- a/.github/workflows/python-package.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: Publish IDC Index Python distribution to PyPI - -on: - workflow_dispatch: - -jobs: - python-build-n-publish: - name: Build and publish Python distribution - runs-on: ubuntu-latest - environment: - name: pypi - url: https://pypi.org/p/idc-index - permissions: - id-token: write - steps: - - uses: actions/checkout@main - - - name: Initialize Python 3.11 - uses: actions/setup-python@v4 - with: - python-version: 3.11 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install flake8 - - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - - name: Build binary wheel and a source tarball - run: python setup.py sdist - - - name: Publish distribution to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/idc_index/index.py b/idc_index/index.py index bbf6e69..2245774 100644 --- a/idc_index/index.py +++ b/idc_index/index.py @@ -8,13 +8,14 @@ import zipfile import duckdb +latest_index_url= 'https://github.com/vkt1414/idc-index/releases/download/latest/idc_index.csv.zip' class IDCClient: def __init__(self): current_dir = os.path.dirname(os.path.abspath(__file__)) file_path = os.path.join(current_dir, 'idc_index.csv.zip') if not os.path.exists(file_path): - self.index=pd.read_csv('https://github.com/ImagingDataCommons/idc-index/releases/download/latest/idc_index.csv.zip', dtype=str, encoding='utf-8') + self.index=pd.read_csv(latest_index_url, dtype=str, encoding='utf-8') else: self.index = pd.read_csv(file_path, dtype=str, encoding='utf-8') self.index = self.index.astype(str).replace('nan', '') diff --git a/setup.py b/setup.py index 49d1392..11f5b54 100644 --- a/setup.py +++ b/setup.py @@ -70,8 +70,8 @@ def run(self): except Exception as e: logging.error('Failed to download s5cmd:', e) setup( - name='idc_index', - version=package_version, + name='test-index-vamsi', + version=os.environ['set_pypi_idc_index_version'], packages=find_packages(), include_package_data=True, install_requires=['pandas', 'requests', 'duckdb'],