Skip to content

Commit

Permalink
create github actions for managing prs and commits
Browse files Browse the repository at this point in the history
  • Loading branch information
vkt1414 committed Dec 1, 2023
1 parent 660141c commit 480261b
Show file tree
Hide file tree
Showing 7 changed files with 328 additions and 42 deletions.
Empty file added .github/__init__.py
Empty file.
95 changes: 95 additions & 0 deletions .github/get_latest_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import os
import re
import requests
import sys
import uuid
from google.cloud import bigquery

class IDCIndexManager:
def __init__(self, project_id):
print("Initializing IDCIndexManager...")
self.project_id = project_id
self.client = bigquery.Client(project=project_id)

def get_latest_idc_release_version(self, view_id):
print("Getting latest IDC release version...")
view = self.client.get_table(view_id)
latest_idc_release_version=int(re.search(r"idc_v(\d+)", view.view_query).group(1))
return latest_idc_release_version

def extract_index_version(self, file_path):
print(f"Extracting index version from {file_path}...")
with open(file_path, "r") as file:
for line in file:
if "def get_idc_version(self):" in line:
return int(re.findall(r"v(\d+)", next(file))[0])

def update_index_version(self, file_path, latest_idc_release_version):
print(f"Updating index version in {file_path}...")
with open(file_path, "r") as file:
lines = file.readlines()
with open(file_path, "w") as file:
for i in range(len(lines)):
if "def get_idc_version(self):" in lines[i]:
lines[i + 1] = re.sub(
r"v(\d+)", f"v{latest_idc_release_version}", lines[i + 1]
)
file.write(lines[i])

def update_sql_queries_folder(
self, dir, current_index_version, latest_idc_release_version
):
print(f"Updating SQL queries from {dir}...")
for file_name in os.listdir(dir):
if file_name.endswith(".sql"):
file_path = os.path.join(dir, file_name)
with open(file_path, "r") as file:
sql_query = file.read()
modified_sql_query = sql_query.replace(
f"idc_v{current_index_version}", f"idc_v{latest_idc_release_version}"
)
with open(file_path, "w") as file:
file.write(modified_sql_query)
return modified_sql_query

def execute_sql_query(self, file_path):
print(f"Executing SQL query from {file_path}...")
with open(file_path, "r") as file:
sql_query = file.read()
df = self.client.query(sql_query).to_dataframe()
csv_file_name = f"{os.path.basename(file_path).split('.')[0]}.csv.zip"
return df, csv_file_name

def create_csv_zip_from_df(self, df, csv_file_name):
print(f"Creating CSV zip file {csv_file_name}...")
df.to_csv(csv_file_name, compression={'method': 'zip'}, escapechar="\\")

def run_queries_folder(self, dir):
print(f"Running queries from {dir}...")
for file_name in os.listdir(dir):
if file_name.endswith(".sql"):
file_path = os.path.join(dir, file_name)
df, csv_file_name = self.execute_sql_query(file_path)
self.create_csv_zip_from_df(df, csv_file_name)

def set_multiline_output(self, name, value):
print(f"Setting multiline output {name}...")
with open(os.environ["GITHUB_OUTPUT"], "a") as fh:
delimiter = uuid.uuid1()
print(f"{name}<<{delimiter}", file=fh)
print(value, file=fh)
print(delimiter, file=fh)

def run(self):
print("Running IDCIndexManager...")
latest_idc_release_version = self.get_latest_idc_release_version("bigquery-public-data.idc_current.dicom_all_view")
print(f"Latest IDC release version: {latest_idc_release_version}")
current_index_version = self.extract_index_version("idc_index/index.py")
print(f"Current index version: {current_index_version}")
self.set_multiline_output("current_index_version", int(current_index_version))
self.set_multiline_output("latest_idc_release_version", int(latest_idc_release_version))


if __name__ == "__main__":
manager = IDCIndexManager("gcp-project-id")
manager.run()
146 changes: 146 additions & 0 deletions .github/workflows/commit-pr-release-manager.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
name: commit-pr-idc-index-release-manager
on:
workflow_dispatch:
push:
pull_request:

jobs:
update_idc_index:
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/idc-index
permissions:
id-token: write
contents: write
pull-requests: read

steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.11

- name: Install dependencies
run: |
pip install requests==2.31.0 pandas==2.1.1 google-cloud-bigquery==3.12.0 \
pyarrow==13.0.0 db-dtypes==1.1.1 PyGithub==2.1.1 flake8==6.1.0 \
duckdb==0.9.2
shell: bash

- name: Authorize Google Cloud
uses: google-github-actions/auth@v1
with:
credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }}
create_credentials_file: true
export_environment_variables: true

- name: Check if queries folder changed in the latest commit or pull request
uses: dorny/paths-filter@v2
id: pr_proposed_changes
with:
filters: |
queries:
- 'queries/**'
- name: If queries are modified, run them with bigquery
id: initialize_idc_manager_class
if: steps.pr_proposed_changes.outputs.queries == 'true'
shell: python
run: |
import sys
import os
sys.path.append(".github")
from get_latest_index import IDCIndexManager
project_id = os.environ['GCP_PROJECT_ID']
manager = IDCIndexManager(project_id)
manager.run_queries_folder("queries/")
env:
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}

- name: If queries are not modified download latest index from github
if: steps.pr_proposed_changes.outputs.queries == 'false' || startsWith(github.ref, 'refs/tags/v')
run:
wget -q https://github.com/ImagingDataCommons/idc-index/releases/download/latest/idc_index.csv.zip
shell: bash

- name: Setup testing
run: |
wget -q "https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_Linux-64bit.tar.gz"\
&& tar -xvzf "s5cmd_2.2.2_Linux-64bit.tar.gz"\
&& rm "s5cmd_2.2.2_Linux-64bit.tar.gz"\
&& mv s5cmd /usr/local/bin/s5cmd
shell: bash

- name: If queries are modified by a pull request, change latest_index_url to use locally available csv
if: steps.pr_proposed_changes.outputs.queries == 'true' && github.event_name == 'pull_request'
run: |
import os
from pathlib import Path
home_dir = str(Path.home())
with open('idc_index/index.py', 'r') as file:
filedata = file.read()
filedata = filedata.replace('https://github.com/ImagingDataCommons/idc-index/releases/download/latest/idc_index.csv.zip', os.path.join(home_dir, 'work/idc-index/idc-index/idc_index.csv.zip'))
with open('idc_index/index.py', 'w') as file:
file.write(filedata)
shell: python

- name: Test package
run: |
python -m unittest -vv tests/idcindex.py
shell: bash

- name: Create Tagged Release
id: create_tagged_release
if: (startsWith(github.ref, 'refs/tags/v')) && (github.event_name != 'pull_request')
#if: (startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' && github.ref == 'refs/heads/main')
uses: ncipollo/release-action@v1
with:
prerelease: false
body: 'Versioned idc-index'
artifacts: "*.zip"

- name: Create latest Release
if: (github.event_name != 'pull_request' && github.ref == 'refs/heads/main')
#if: (github.event_name != 'pull_request' && github.ref == 'refs/heads/main')
uses: crowbarmaster/GH-Automatic-Releases@latest
with:
repo_token: "${{ secrets.GITHUB_TOKEN }}"
automatic_release_tag: "latest"
#generate_notes: false
body: "Latest idc-index"
prerelease: true
title: "Latest idc-index"
files: |
*.zip
- name: Get version
uses: mtkennerly/dunamai-action@v1
#if: (startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' && github.ref == 'refs/heads/main')
with:
env-var: set_pypi_idc_index_version
args: --style semver

- name: Echo soon to be released pypi version
#if: (startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' && github.ref == 'refs/heads/main')
run: |
echo $set_pypi_idc_index_version
- name: Build a source tarball
#if: (startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' && github.ref == 'refs/heads/main')
run: python setup.py sdist

- name: Publish distribution to PyPI
#if: (startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' && github.ref == 'refs/heads/main')
if: startsWith(github.ref, 'refs/tags/v')
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: https://test.pypi.org/legacy/
83 changes: 83 additions & 0 deletions .github/workflows/get-latest-index.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
name: idc-index release manager
on:
workflow_dispatch:
schedule:
- cron: 0 12 */1 * *

jobs:
update_idc_index:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.11

- name: Install dependencies
run: pip install requests==2.31.0 pandas==2.1.1 google-cloud-bigquery==3.12.0 pyarrow==13.0.0 db-dtypes==1.1.1 PyGithub==2.1.1

- name: Authorize Google Cloud
uses: google-github-actions/auth@v1
with:
credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }}
create_credentials_file: true
export_environment_variables: true

- name: Run script to get the latest idc index
id: initialize_idc_manager_class
shell: python
run: |
import sys
import os
sys.path.append(".github")
from get_latest_index import IDCIndexManager
project_id = os.environ['GCP_PROJECT_ID']
manager = IDCIndexManager(project_id)
current_index_version = manager.extract_index_version("idc_index/index.py")
latest_idc_release_version = manager.get_latest_idc_release_version("bigquery-public-data.idc_current.dicom_all_view")
if current_index_version < latest_idc_release_version:
manager.update_index_version("idc_index/index.py", latest_idc_release_version)
manager.update_sql_queries_folder("queries/", current_index_version, latest_idc_release_version)
manager.run_queries_folder("queries/")
manager.set_multiline_output("current_index_version", int(current_index_version))
manager.set_multiline_output("latest_idc_release_version", int(latest_idc_release_version))
env:
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}

- name: Create latest Release
id: create_release
if: ${{ steps.initialize_idc_manager_class.outputs.current_index_version != steps.initialize_idc_manager_class.outputs.latest_idc_release_version }}
uses: ncipollo/release-action@v1
with:
tag: latest
prerelease: true
makeLatest: true
allowUpdates: true
body: 'Latest idc-index'
artifacts: "*.zip"

- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
title: Update to v${{ steps.initialize_idc_manager_class.outputs.latest_idc_release_version }}
body: Update sql queries and/or index.py to v${{ steps.initialize_idc_manager_class.outputs.latest_idc_release_version }}
base: main
branch: update-sql-queries-and-or-index
add-paths: |
queries/*.sql
idc_index/index.py
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}




39 changes: 0 additions & 39 deletions .github/workflows/python-package.yml

This file was deleted.

3 changes: 2 additions & 1 deletion idc_index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
import zipfile
import duckdb

latest_index_url= 'https://github.com/vkt1414/idc-index/releases/download/latest/idc_index.csv.zip'

class IDCClient:
def __init__(self):
current_dir = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(current_dir, 'idc_index.csv.zip')
if not os.path.exists(file_path):
self.index=pd.read_csv('https://github.com/ImagingDataCommons/idc-index/releases/download/latest/idc_index.csv.zip', dtype=str, encoding='utf-8')
self.index=pd.read_csv(latest_index_url, dtype=str, encoding='utf-8')
else:
self.index = pd.read_csv(file_path, dtype=str, encoding='utf-8')
self.index = self.index.astype(str).replace('nan', '')
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def run(self):
except Exception as e:
logging.error('Failed to download s5cmd:', e)
setup(
name='idc_index',
version=package_version,
name='test-index-vamsi',
version=os.environ['set_pypi_idc_index_version'],
packages=find_packages(),
include_package_data=True,
install_requires=['pandas', 'requests', 'duckdb'],
Expand Down

0 comments on commit 480261b

Please sign in to comment.