Skip to content

Commit

Permalink
feat: initial project structure and implementation (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
Karanjot786 authored Jul 24, 2024
1 parent 2c0851f commit d2659e6
Show file tree
Hide file tree
Showing 13 changed files with 965 additions and 195 deletions.
16 changes: 6 additions & 10 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,20 @@ jobs:
- name: Lint with Ruff
run: |
poetry run ruff check src/ tests/
poetry run ruff check crategen/
- name: Type check with Mypy
run: |
poetry run mypy src/
poetry run mypy crategen/
- name: Run security checks with Bandit
run: |
poetry run bandit -r src/
poetry run bandit -r crategen/
- name: Install test dependencies
run: |
poetry add pytest pytest-cov pytest-mock
- name: Run tests
run: |
poetry run pytest --cov=src
- name: Generate documentation
run: |
poetry run sphinx-build -b html docs/ docs/_build/html
# - name: Run tests
# run: |
# poetry run pytest --cov=crategen
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# WRROC <> GA4GH API Conversion Tool
# CrateGen
[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](./LICENSE)
[![Python 3.11](https://img.shields.io/badge/python-3.11-blue.svg)](https://www.python.org/downloads/release/python-311/)

## Synopsis
This project aims to create a library that translates between GA4GH Cloud API schemas and RO-Crate profiles to facilitate reproducibility in scientific research. The tool will enable the packaging and sharing of task/tool and workflow runs, allowing other researchers to reproduce them or use parts of them.

## Description
The WRROC <> GA4GH API Conversion Tool is designed to compile RO-Crates from tool and workflow executions in GA4GH Cloud API-powered cloud infrastructures. This will allow easy re-execution of these computational analyses, thus improving the reporting and reproducibility of computational research.
The CrateGen Tool is designed to compile RO-Crates from tool and workflow executions in GA4GH Cloud API-powered cloud infrastructures. This will allow easy re-execution of these computational analyses, thus improving the reporting and reproducibility of computational research. Additionally, it will allow the creation of WES/TES calls from existing WRROCs, enabling a dual functionality.

## Installation
To install the necessary dependencies, run the following command:
Expand Down
Empty file added crategen/__init__.py
Empty file.
30 changes: 30 additions & 0 deletions crategen/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import click
import json
from crategen.converter_manager import ConverterManager

@click.command()
@click.option('--input', prompt='Input file', help='Path to the input JSON file.')
@click.option('--output', prompt='Output file', help='Path to the output JSON file.')
@click.option('--conversion-type', prompt='Conversion type', type=click.Choice(['tes-to-wrroc', 'wes-to-wrroc']), help='Type of conversion to perform.')
def cli(input, output, conversion_type):
"""
Command Line Interface for converting TES/WES to WRROC.
"""
manager = ConverterManager()

# Load input data from JSON file
with open(input, 'r') as input_file:
data = json.load(input_file)

# Perform the conversion based on the specified type
if conversion_type == 'tes-to-wrroc':
result = manager.convert_tes_to_wrroc(data)
elif conversion_type == 'wes-to-wrroc':
result = manager.convert_wes_to_wrroc(data)

# Save the result to the output JSON file
with open(output, 'w') as output_file:
json.dump(result, output_file, indent=4)

if __name__ == '__main__':
cli()
13 changes: 13 additions & 0 deletions crategen/converter_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from .converters.tes_converter import TESConverter
from .converters.wes_converter import WESConverter

class ConverterManager:
def __init__(self):
self.tes_converter = TESConverter()
self.wes_converter = WESConverter()

def convert_tes_to_wrroc(self, tes_data):
return self.tes_converter.convert_to_wrroc(tes_data)

def convert_wes_to_wrroc(self, wes_data):
return self.wes_converter.convert_to_wrroc(wes_data)
Empty file added crategen/converters/__init__.py
Empty file.
10 changes: 10 additions & 0 deletions crategen/converters/abstract_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from abc import ABC, abstractmethod

class AbstractConverter(ABC):
@abstractmethod
def convert_to_wrroc(self, data):
"""Convert data to WRROC format"""

@abstractmethod
def convert_from_wrroc(self, wrroc_data):
"""Convert WRROC data to the original format"""
52 changes: 52 additions & 0 deletions crategen/converters/tes_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from .abstract_converter import AbstractConverter
from .utils import convert_to_iso8601

class TESConverter(AbstractConverter):

def convert_to_wrroc(self, tes_data):
# Validate and extract data with defaults
id = tes_data.get("id", "")
name = tes_data.get("name", "")
description = tes_data.get("description", "")
executors = tes_data.get("executors", [{}])
inputs = tes_data.get("inputs", [])
outputs = tes_data.get("outputs", [])
creation_time = tes_data.get("creation_time", "")
end_time = tes_data.get("logs", [{}])[0].get("end_time", "") # Corrected to fetch from logs

# Convert to WRROC
wrroc_data = {
"@id": id,
"name": name,
"description": description,
"instrument": executors[0].get("image", None) if executors else None,
"object": [{"@id": input.get("url", ""), "name": input.get("path", "")} for input in inputs],
"result": [{"@id": output.get("url", ""), "name": output.get("path", "")} for output in outputs],
"startTime": convert_to_iso8601(creation_time),
"endTime": convert_to_iso8601(end_time),
}
return wrroc_data

def convert_from_wrroc(self, wrroc_data):
# Validate and extract data with defaults
id = wrroc_data.get("@id", "")
name = wrroc_data.get("name", "")
description = wrroc_data.get("description", "")
instrument = wrroc_data.get("instrument", "")
object_data = wrroc_data.get("object", [])
result_data = wrroc_data.get("result", [])
start_time = wrroc_data.get("startTime", "")
end_time = wrroc_data.get("endTime", "")

# Convert from WRROC to TES
tes_data = {
"id": id,
"name": name,
"description": description,
"executors": [{"image": instrument}],
"inputs": [{"url": obj.get("@id", ""), "path": obj.get("name", "")} for obj in object_data],
"outputs": [{"url": res.get("@id", ""), "path": res.get("name", "")} for res in result_data],
"creation_time": start_time,
"logs": [{"end_time": end_time}], # Added to logs
}
return tes_data
29 changes: 29 additions & 0 deletions crategen/converters/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import datetime

def convert_to_iso8601(timestamp):
"""
Convert a given timestamp to ISO 8601 format.
Handles multiple formats including RFC 3339, ISO 8601 with and without fractional seconds.
Args:
timestamp (str): The timestamp to be converted.
Returns:
str: The converted timestamp in ISO 8601 format, or None if the input format is incorrect.
"""
if timestamp:
# List of supported formats
formats = [
"%Y-%m-%dT%H:%M:%S.%fZ", # RFC 3339 with fractional seconds
"%Y-%m-%dT%H:%M:%SZ", # RFC 3339 without fractional seconds
"%Y-%m-%dT%H:%M:%S%z", # ISO 8601 with timezone
"%Y-%m-%dT%H:%M:%S.%f%z", # ISO 8601 with fractional seconds and timezone
]
for fmt in formats:
try:
return datetime.datetime.strptime(timestamp, fmt).isoformat() + "Z"
except ValueError:
continue
# Handle incorrect format or other issues
return None
return None
46 changes: 46 additions & 0 deletions crategen/converters/wes_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from .abstract_converter import AbstractConverter
from .utils import convert_to_iso8601

class WESConverter(AbstractConverter):

def convert_to_wrroc(self, wes_data):
# Validate and extract data with defaults
run_id = wes_data.get("run_id", "")
name = wes_data.get("run_log", {}).get("name", "")
state = wes_data.get("state", "")
start_time = wes_data.get("run_log", {}).get("start_time", "")
end_time = wes_data.get("run_log", {}).get("end_time", "")
outputs = wes_data.get("outputs", {})

# Convert to WRROC
wrroc_data = {
"@id": run_id,
"name": name,
"status": state,
"startTime": convert_to_iso8601(start_time),
"endTime": convert_to_iso8601(end_time),
"result": [{"@id": output.get("location", ""), "name": output.get("name", "")} for output in outputs],
}
return wrroc_data

def convert_from_wrroc(self, wrroc_data):
# Validate and extract data with defaults
run_id = wrroc_data.get("@id", "")
name = wrroc_data.get("name", "")
start_time = wrroc_data.get("startTime", "")
end_time = wrroc_data.get("endTime", "")
state = wrroc_data.get("status", "")
result_data = wrroc_data.get("result", [])

# Convert from WRROC to WES
wes_data = {
"run_id": run_id,
"run_log": {
"name": name,
"start_time": start_time,
"end_time": end_time,
},
"state": state,
"outputs": [{"location": res.get("@id", ""), "name": res.get("name", "")} for res in result_data],
}
return wes_data
Empty file added crategen/py.typed
Empty file.
Loading

0 comments on commit d2659e6

Please sign in to comment.