From db0fc0dfd38a14543240820e0bfd27a1d66cbc88 Mon Sep 17 00:00:00 2001 From: salihuDickson Date: Wed, 21 Aug 2024 22:52:28 +0100 Subject: [PATCH 1/6] improve tes models --- crategen/converters/tes_converter.py | 43 +++-- crategen/converters/utils.py | 29 ---- crategen/models/__init__.py | 0 crategen/models/tes_models.py | 250 +++++++++++++++++++++++++++ 4 files changed, 278 insertions(+), 44 deletions(-) delete mode 100644 crategen/converters/utils.py create mode 100644 crategen/models/__init__.py create mode 100644 crategen/models/tes_models.py diff --git a/crategen/converters/tes_converter.py b/crategen/converters/tes_converter.py index ec2eb3b..b6e9c15 100644 --- a/crategen/converters/tes_converter.py +++ b/crategen/converters/tes_converter.py @@ -1,8 +1,9 @@ from .abstract_converter import AbstractConverter -from .utils import convert_to_iso8601 -from ..models import TESData, WRROCDataTES +from ..models.tes_models import TESData +from ..models.wrroc_models import WRROCDataTES from pydantic import ValidationError + class TESConverter(AbstractConverter): def convert_to_wrroc(self, tes_data): @@ -13,25 +14,37 @@ def convert_to_wrroc(self, tes_data): raise ValueError(f"Invalid TES data: {e}") # Extract validated data - id = validated_tes_data.id - name = validated_tes_data.name - description = validated_tes_data.description - executors = validated_tes_data.executors - inputs = validated_tes_data.inputs - outputs = validated_tes_data.outputs - creation_time = validated_tes_data.creation_time - end_time = validated_tes_data.logs[0].end_time if validated_tes_data.logs else "" + ( + id, + name, + description, + creation_time, + state, + inputs, + outputs, + executors, + resources, + volumes, + logs, + tags, + ) = validated_tes_data.dict().values() + end_time = validated_tes_data.logs[0].end_time # Convert to WRROC wrroc_data = { "@id": id, "name": name, "description": description, - "instrument": executors[0].image if executors else None, - "object": [{"@id": input.url, "name": input.path} for input in inputs], - "result": [{"@id": output.url, "name": output.path} for output in outputs], - "startTime": convert_to_iso8601(creation_time), - "endTime": convert_to_iso8601(end_time), + "instrument": executors[0]["image"] if executors else None, + "object": [ + {"@id": input["url"], "name": input["path"], "type": input["type"]} + for input in inputs + ], + "result": [ + {"@id": output["url"], "name": output["path"]} for output in outputs + ], + "startTime": creation_time, + "endTime": end_time, } return wrroc_data diff --git a/crategen/converters/utils.py b/crategen/converters/utils.py deleted file mode 100644 index 2116c7f..0000000 --- a/crategen/converters/utils.py +++ /dev/null @@ -1,29 +0,0 @@ -import datetime - -def convert_to_iso8601(timestamp): - """ - Convert a given timestamp to ISO 8601 format. - Handles multiple formats including RFC 3339, ISO 8601 with and without fractional seconds. - - Args: - timestamp (str): The timestamp to be converted. - - Returns: - str: The converted timestamp in ISO 8601 format, or None if the input format is incorrect. - """ - if timestamp: - # List of supported formats - formats = [ - "%Y-%m-%dT%H:%M:%S.%fZ", # RFC 3339 with fractional seconds - "%Y-%m-%dT%H:%M:%SZ", # RFC 3339 without fractional seconds - "%Y-%m-%dT%H:%M:%S%z", # ISO 8601 with timezone - "%Y-%m-%dT%H:%M:%S.%f%z", # ISO 8601 with fractional seconds and timezone - ] - for fmt in formats: - try: - return datetime.datetime.strptime(timestamp, fmt).isoformat() + "Z" - except ValueError: - continue - # Handle incorrect format or other issues - return None - return None diff --git a/crategen/models/__init__.py b/crategen/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/crategen/models/tes_models.py b/crategen/models/tes_models.py new file mode 100644 index 0000000..9e459e2 --- /dev/null +++ b/crategen/models/tes_models.py @@ -0,0 +1,250 @@ +from pydantic import BaseModel, AnyUrl, root_validator, validator +from ..utils import convert_to_rfc3339_format +from datetime import datetime +from typing import Optional +from enum import Enum +import os + +class TESFileType(str, Enum): + FILE = "FILE" + DIRECTORY = "DIRECTORY" + +class TESState(str, Enum): + UNKNOWN = 'UNKNOWN' + QUEUED = 'QUEUED' + INITIALIZING = 'INITIALIZING' + RUNNING = 'RUNNING' + PAUSED = 'PAUSED' + COMPLETE = 'COMPLETE' + EXECUTOR_ERROR = 'EXECUTOR_ERROR' + SYSTEM_ERROR = 'SYSTEM_ERROR' + CANCELLED = 'CANCELLED' + +class TESOutputFileLog(BaseModel): + """ + Information about all output files. Directory outputs are flattened into separate items. + + **Attributes:** + + - **url** (str): URL of the file in storage. + - **path** (str): Path of the file inside the container. Must be an absolute path. + - **size_bytes** (str): Size of the file in bytes. Note, this is currently coded as a string because official JSON doesn't support int64 numbers. + + **Reference:** https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask + """ + + url: str + path: str + size_bytes: str + +class TESExecutorLog(BaseModel): + """ + Logs for each executor + + **Attributes:** + + - **start_time** (`Optional[str]`): Time the executor started, in RFC 3339 format. + - **end_time** (`Optional[str]`): Time the executor ended, in RFC 3339 format. + - **stdout** (`Optional[str]`): Stdout content. + - **stderr** (`Optional[str]`): Stderr content. + - **exit_code** (`int`): The exit code of the executor. + + **Reference:** https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask + """ + + start_time: Optional[datetime] = None + end_time: Optional[datetime] = None + stdout: Optional[str] = None + stderr: Optional[str] = None + exit_code: int + + @validator('start_time', 'end_time') + def validate_datetime(value): + return convert_to_rfc3339_format(value) + +class TESExecutor(BaseModel): + """ + An array of executors to be run + + **Attributes:** + - **image** (`str`): Name of the container image. + - **command** (`list[str]`): A sequence of program arguments to execute, where the first argument is the program to execute. + - **workdir** (`Optional[str]`): The working directory that the command will be executed in. + - **stdout** (`Optional[str]`): Path inside the container to a file where the executor's stdout will be written to. Must be an absolute path + - **stderr** (`Optional[str]`): Path inside the container to a file where the executor's stderr will be written to. Must be an absolute path. + - **stdin** (`Optional[str]`): Path inside the container to a file which will be piped to the executor's stdin. Must be an absolute path. + - **env** (`Optional[dict[str, str]]`): Enviromental variables to set within the container + + **Reference:** https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask + """ + image: str + command: list[str] + workdir: Optional[str] = None + stdout: Optional[str] = None + stderr: Optional[str] = None + stdin: Optional[str] = None + env: Optional[dict[str, str]] = None + + @validator("stdin", "stdout") + def validate_stdin_stdin(cls, value): + if not os.path.isabs(value): + raise ValueError(f"The '${value}' attribute must contain an absolute path.") + return value + +class TESResources(BaseModel): + """ + Represents the resources required by a TES task. + + **Attributes:** + + - **cpu_cores** (`Optional[int]`): Requested number of CPUs. + - **preemptible** (`Optional[bool]`): Define if the task is allowed to run on preemptible compute instances, for example, AWS Spot. + - **ram_gb** (`Optional[float]`): The amount of RAM in GB required. + - **disk_gb** (`Optional[float]`): The amount of disk space in GB required. + - **zones** (`Optional[list[str]]`): Request that the task be run in these compute zones. + + **Reference:** https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask + """ + cpu_cores: Optional[int] = None + preemptible: Optional[bool] = None + ram_gb: Optional[float] = None + disk_gb: Optional[float] = None + zones: Optional[list[str]] = None + +class TESInput(BaseModel): + """ + Input files that will be used by the task. Inputs will be downloaded and mounted into the executor container as defined by the task request document. + + **Attributes:** + + - **name** (`Optional[str]`): The name of the input file. + - **description** (`Optional[str]`): A brief description of the input. + - **url** (`AnyUrl`): The URL of the input file. Must be an absolute path + - **path** (`str`): TPath of the file inside the container. Must be an absolute path. + - **type** (`TESFileType`): The type of input ('FILE' or 'DIRECTORY'). Default is 'FILE' + - **content** (`Optional[str]`): The content of the input file, if provided inline. + + Reference: https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask + """ + name: Optional[str] = None + description: Optional[str] = None + url: Optional[AnyUrl] + path: str + type: TESFileType = TESFileType.FILE + content: Optional[str] = None + + @root_validator() + def validate_content_and_url(cls, values): + """ + - If content is set url should be ignored + - If content is not set then url should be present + """ + + content_is_set = values.get('content') and len(values.get('content').strip()) > 0 + url_is_set = values.get('url') and len(values.get('url').strip()) > 0 + + if content_is_set: + values['url'] = None + elif not content_is_set and not url_is_set: + raise ValueError("The 'url' attribute is required when the 'content' attribute is empty") + return values + + @validator("path") + def validate_path(cls, value): + if not os.path.isabs(value): + raise ValueError("The 'path' attribute must contain an absolute path.") + return value + +class TESOutput(BaseModel): + """ + Output files. Outputs will be uploaded from the executor container to long-term storage. + + **Attributes:** + + - **name** (`Optional[str]`): User-provided name of output file + - **description** (`Optional[str]`): Optional users provided description field, can be used for documentation. + - **url** (`AnyUrl`): URL for the file to be copied by the TES server after the task is complete + - **path** (`str`): Path of the file inside the container. Must be an absolute path. + - **type** (`TESFileType`): The type of output (e.g., FILE, DIRECTORY). + + Reference: https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask + """ + name: Optional[str] = None + description: Optional[str] = None + url: AnyUrl + path: str + type: TESFileType = TESFileType.FILE + + + @validator("path") + def validate_path(cls, value): + if not os.path.isabs(value): + raise ValueError("The 'path' attribute must contain an absolute path.") + return value + +class TESTaskLog(BaseModel): + """ + Task logging information. Normally, this will contain only one entry, but in the case where a task fails and is retried, an entry will be appended to this list. + + **Attributes:** + + - **logs** (`list[TESExecutorLog]`): Logs for each executor. + - **metadata** (`Optional[dict[str, str]]`): Arbitrary logging metadata included by the implementation. + - **start_time** (`Optional[datetime]`): When the task started, in RFC 3339 format. + - **end_time** (`Optional[datetime]`): When the task ended, in RFC 3339 format. + - **outputs** (`list[TESOutputFileLog]`): Information about all output files. Directory outputs are flattened into separate items. + - **system_logs** (`Optional[list[str]]`): System logs are any logs the system decides are relevant, which are not tied directly to an Executor process. Content is implementation specific: format, size, etc. + - **status** (`Optional[str]`): The status of the task. + + **Reference:** [https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask](https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask) + """ + + logs: list[TESExecutorLog] + metadata: Optional[dict[str, str]] + start_time: Optional[datetime] + end_time: Optional[datetime] + outputs: list[TESOutputFileLog] + system_logs: Optional[list[str]] + + @validator('start_time', 'end_time') + def validate_datetime(value): + return convert_to_rfc3339_format(value) + +class TESData(BaseModel): + """ + Represents a TES task. + + **Attributes:** + + - **id** (`str`): Task identifier assigned by the server. + - **name** (`Optional[str]`): User-provided task name. + - **description** (`Optional[str]`): Optional user-provided description of task for documentation purposes. + - **creation_time** (`Optional[str]`): The time the task was created. + - **state** (`Optional[str]`): Task state as defined by the server + - **inputs** (`list[TESInput]`): Input files that will be used by the task. + - **outputs** (`list[TESOutput]`): Output files that will be uploaded from the executor container to long-term storage. + - **executors** (`list[Executor]`): An array of executors to be run. + - **resources** (`Optional[TESResources]`): The resources required by the TES task. + - **volumes** (`Optional[list[str]]`): Volumes are directories which may be used to share data between Executors.. + - **logs** (`Optional[list[TESLogs]]`): Task logging information + - **tags** (`Optional[[str, str]]`): A key-value map of arbitrary tags. + + **Reference:** [https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask](https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask) + """ + + id: str + name: Optional[str] = None + description: Optional[str] = None + creation_time: Optional[datetime] = None + state: Optional[TESState] = TESState.UNKNOWN + inputs: list[TESInput] + outputs: list[TESOutput] + executors: list[TESExecutor] + resources: Optional[TESResources] = None + volumes: Optional[list[str]] = None + logs: Optional[list[TESTaskLog]] = None + tags: Optional[dict[str, str]] = None + + @validator('creation_time') + def validate_datetime(value): + return convert_to_rfc3339_format(value) From 9f9f0ebef1152a107b490b9feac8c58e0759676f Mon Sep 17 00:00:00 2001 From: salihuDickson Date: Thu, 22 Aug 2024 23:31:28 +0100 Subject: [PATCH 2/6] install lefthook --- lefthook.yml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 lefthook.yml diff --git a/lefthook.yml b/lefthook.yml new file mode 100644 index 0000000..f6e5dfe --- /dev/null +++ b/lefthook.yml @@ -0,0 +1,35 @@ +# EXAMPLE USAGE: +# +# Refer for explanation to following link: +# https://github.com/evilmartians/lefthook/blob/master/docs/configuration.md +# +# pre-push: +# commands: +# packages-audit: +# tags: frontend security +# run: yarn audit +# gems-audit: +# tags: backend security +# run: bundle audit +# +# pre-commit: +# parallel: true +# commands: +# eslint: +# glob: "*.{js,ts,jsx,tsx}" +# run: yarn eslint {staged_files} +# rubocop: +# tags: backend style +# glob: "*.rb" +# exclude: '(^|/)(application|routes)\.rb$' +# run: bundle exec rubocop --force-exclusion {all_files} +# govet: +# tags: backend style +# files: git ls-files -m +# glob: "*.go" +# run: go vet {files} +# scripts: +# "hello.js": +# runner: node +# "any.go": +# runner: go run From 7a8ad314fbf8ee62c0021122707dc421022694e9 Mon Sep 17 00:00:00 2001 From: Salihu <91833785+SalihuDickson@users.noreply.github.com> Date: Thu, 22 Aug 2024 23:33:28 +0100 Subject: [PATCH 3/6] Refactor/lint (#23) Co-authored-by: salihuDickson --- .github/workflows/ci.yml | 67 +++++------ crategen/cli.py | 33 ++++-- crategen/converters/abstract_converter.py | 3 +- crategen/converters/tes_converter.py | 10 +- crategen/models/tes_models.py | 138 ++++++++++++---------- crategen/validators.py | 41 ++++--- pyproject.toml | 17 ++- 7 files changed, 178 insertions(+), 131 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0d1b757..237b6f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,43 +2,44 @@ name: CI on: push: - branches: [ main ] + branches: '*' pull_request: - branches: [ main ] + branches: '*' jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.11' - - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python3 - - poetry install - - - name: Lint with Ruff - run: | - poetry run ruff check crategen/ - - - name: Type check with Mypy - run: | - poetry run mypy crategen/ - - - name: Run security checks with Bandit - run: | - poetry run bandit -r crategen/ - - - name: Install test dependencies - run: | - poetry add pytest pytest-cov pytest-mock - - - name: Run tests - run: | - poetry run pytest --cov=crategen + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.11' + + - name: Install Poetry + run: | + curl -sSL https://install.python-poetry.org | python3 - + poetry install + + - name: Lint with Ruff + run: | + poetry run ruff check crategen/ + if: ${{ success() }} + + - name: Type check with Mypy + run: | + poetry run mypy crategen/ + + - name: Run security checks with Bandit + run: | + poetry run bandit -r crategen/ + + - name: Install test dependencies + run: | + poetry add pytest pytest-cov pytest-mock + + - name: Run tests + run: | + poetry run pytest --cov=crategen diff --git a/crategen/cli.py b/crategen/cli.py index 20198fb..3f207b2 100644 --- a/crategen/cli.py +++ b/crategen/cli.py @@ -1,30 +1,37 @@ -import click import json + +import click + from crategen.converter_manager import ConverterManager + @click.command() -@click.option('--input', prompt='Input file', help='Path to the input JSON file.') -@click.option('--output', prompt='Output file', help='Path to the output JSON file.') -@click.option('--conversion-type', prompt='Conversion type', type=click.Choice(['tes-to-wrroc', 'wes-to-wrroc']), help='Type of conversion to perform.') +@click.option("--input", prompt="Input file", help="Path to the input JSON file.") +@click.option("--output", prompt="Output file", help="Path to the output JSON file.") +@click.option( + "--conversion-type", + prompt="Conversion type", + type=click.Choice(["tes-to-wrroc", "wes-to-wrroc"]), + help="Type of conversion to perform.", +) def cli(input, output, conversion_type): - """ - Command Line Interface for converting TES/WES to WRROC. - """ + """Command Line Interface for converting TES/WES to WRROC.""" manager = ConverterManager() # Load input data from JSON file - with open(input, 'r') as input_file: + with open(input) as input_file: data = json.load(input_file) # Perform the conversion based on the specified type - if conversion_type == 'tes-to-wrroc': + if conversion_type == "tes-to-wrroc": result = manager.convert_tes_to_wrroc(data) - elif conversion_type == 'wes-to-wrroc': + elif conversion_type == "wes-to-wrroc": result = manager.convert_wes_to_wrroc(data) - + # Save the result to the output JSON file - with open(output, 'w') as output_file: + with open(output, "w") as output_file: json.dump(result, output_file, indent=4) -if __name__ == '__main__': + +if __name__ == "__main__": cli() diff --git a/crategen/converters/abstract_converter.py b/crategen/converters/abstract_converter.py index d0cd0ee..f686557 100644 --- a/crategen/converters/abstract_converter.py +++ b/crategen/converters/abstract_converter.py @@ -1,10 +1,11 @@ from abc import ABC, abstractmethod + class AbstractConverter(ABC): @abstractmethod def convert_to_wrroc(self, data): """Convert data to WRROC format""" - + @abstractmethod def convert_from_wrroc(self, wrroc_data): """Convert WRROC data to the original format""" diff --git a/crategen/converters/tes_converter.py b/crategen/converters/tes_converter.py index b6e9c15..8ca059d 100644 --- a/crategen/converters/tes_converter.py +++ b/crategen/converters/tes_converter.py @@ -1,17 +1,17 @@ -from .abstract_converter import AbstractConverter +from pydantic import ValidationError + from ..models.tes_models import TESData from ..models.wrroc_models import WRROCDataTES -from pydantic import ValidationError +from .abstract_converter import AbstractConverter class TESConverter(AbstractConverter): - def convert_to_wrroc(self, tes_data): # Validate TES data try: validated_tes_data = TESData(**tes_data) except ValidationError as e: - raise ValueError(f"Invalid TES data: {e}") + raise ValueError(f"Invalid TES data: {e}") from e # Extract validated data ( @@ -53,7 +53,7 @@ def convert_from_wrroc(self, data): try: validated_data = WRROCDataTES(**data) except ValidationError as e: - raise ValueError(f"Invalid WRROC data for TES conversion: {e}") + raise ValueError(f"Invalid WRROC data for TES conversion: {e}") from e # Extract validated data id = validated_data.id diff --git a/crategen/models/tes_models.py b/crategen/models/tes_models.py index 9e459e2..407a9c0 100644 --- a/crategen/models/tes_models.py +++ b/crategen/models/tes_models.py @@ -1,24 +1,29 @@ -from pydantic import BaseModel, AnyUrl, root_validator, validator -from ..utils import convert_to_rfc3339_format +import os from datetime import datetime -from typing import Optional from enum import Enum -import os +from typing import Optional + +from pydantic import AnyUrl, BaseModel, root_validator, validator + +from ..utils import convert_to_rfc3339_format + class TESFileType(str, Enum): FILE = "FILE" DIRECTORY = "DIRECTORY" + class TESState(str, Enum): - UNKNOWN = 'UNKNOWN' - QUEUED = 'QUEUED' - INITIALIZING = 'INITIALIZING' - RUNNING = 'RUNNING' - PAUSED = 'PAUSED' - COMPLETE = 'COMPLETE' - EXECUTOR_ERROR = 'EXECUTOR_ERROR' - SYSTEM_ERROR = 'SYSTEM_ERROR' - CANCELLED = 'CANCELLED' + UNKNOWN = "UNKNOWN" + QUEUED = "QUEUED" + INITIALIZING = "INITIALIZING" + RUNNING = "RUNNING" + PAUSED = "PAUSED" + COMPLETE = "COMPLETE" + EXECUTOR_ERROR = "EXECUTOR_ERROR" + SYSTEM_ERROR = "SYSTEM_ERROR" + CANCELLED = "CANCELLED" + class TESOutputFileLog(BaseModel): """ @@ -34,9 +39,10 @@ class TESOutputFileLog(BaseModel): """ url: str - path: str + path: str size_bytes: str + class TESExecutorLog(BaseModel): """ Logs for each executor @@ -58,10 +64,11 @@ class TESExecutorLog(BaseModel): stderr: Optional[str] = None exit_code: int - @validator('start_time', 'end_time') + @validator("start_time", "end_time") def validate_datetime(value): - return convert_to_rfc3339_format(value) - + return convert_to_rfc3339_format(value) + + class TESExecutor(BaseModel): """ An array of executors to be run @@ -72,11 +79,12 @@ class TESExecutor(BaseModel): - **workdir** (`Optional[str]`): The working directory that the command will be executed in. - **stdout** (`Optional[str]`): Path inside the container to a file where the executor's stdout will be written to. Must be an absolute path - **stderr** (`Optional[str]`): Path inside the container to a file where the executor's stderr will be written to. Must be an absolute path. - - **stdin** (`Optional[str]`): Path inside the container to a file which will be piped to the executor's stdin. Must be an absolute path. + - **stdin** (`Optional[str]`): Path inside the container to a file which will be piped to the executor's stdin. Must be an absolute path. - **env** (`Optional[dict[str, str]]`): Enviromental variables to set within the container **Reference:** https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask """ + image: str command: list[str] workdir: Optional[str] = None @@ -87,9 +95,10 @@ class TESExecutor(BaseModel): @validator("stdin", "stdout") def validate_stdin_stdin(cls, value): - if not os.path.isabs(value): - raise ValueError(f"The '${value}' attribute must contain an absolute path.") - return value + if not os.path.isabs(value): + raise ValueError(f"The '${value}' attribute must contain an absolute path.") + return value + class TESResources(BaseModel): """ @@ -105,12 +114,14 @@ class TESResources(BaseModel): **Reference:** https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask """ + cpu_cores: Optional[int] = None preemptible: Optional[bool] = None ram_gb: Optional[float] = None disk_gb: Optional[float] = None zones: Optional[list[str]] = None + class TESInput(BaseModel): """ Input files that will be used by the task. Inputs will be downloaded and mounted into the executor container as defined by the task request document. @@ -126,35 +137,40 @@ class TESInput(BaseModel): Reference: https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask """ + name: Optional[str] = None description: Optional[str] = None url: Optional[AnyUrl] path: str type: TESFileType = TESFileType.FILE - content: Optional[str] = None + content: Optional[str] = None @root_validator() def validate_content_and_url(cls, values): """ - - If content is set url should be ignored - - If content is not set then url should be present + - If content is set url should be ignored + - If content is not set then url should be present """ + content_is_set = ( + values.get("content") and len(values.get("content").strip()) > 0 + ) + url_is_set = values.get("url") and len(values.get("url").strip()) > 0 - content_is_set = values.get('content') and len(values.get('content').strip()) > 0 - url_is_set = values.get('url') and len(values.get('url').strip()) > 0 - if content_is_set: - values['url'] = None + values["url"] = None elif not content_is_set and not url_is_set: - raise ValueError("The 'url' attribute is required when the 'content' attribute is empty") + raise ValueError( + "The 'url' attribute is required when the 'content' attribute is empty" + ) return values - + @validator("path") def validate_path(cls, value): if not os.path.isabs(value): raise ValueError("The 'path' attribute must contain an absolute path.") return value + class TESOutput(BaseModel): """ Output files. Outputs will be uploaded from the executor container to long-term storage. @@ -169,46 +185,48 @@ class TESOutput(BaseModel): Reference: https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask """ + name: Optional[str] = None description: Optional[str] = None url: AnyUrl path: str type: TESFileType = TESFileType.FILE - @validator("path") def validate_path(cls, value): if not os.path.isabs(value): raise ValueError("The 'path' attribute must contain an absolute path.") return value + class TESTaskLog(BaseModel): - """ - Task logging information. Normally, this will contain only one entry, but in the case where a task fails and is retried, an entry will be appended to this list. - - **Attributes:** - - - **logs** (`list[TESExecutorLog]`): Logs for each executor. - - **metadata** (`Optional[dict[str, str]]`): Arbitrary logging metadata included by the implementation. - - **start_time** (`Optional[datetime]`): When the task started, in RFC 3339 format. - - **end_time** (`Optional[datetime]`): When the task ended, in RFC 3339 format. - - **outputs** (`list[TESOutputFileLog]`): Information about all output files. Directory outputs are flattened into separate items. - - **system_logs** (`Optional[list[str]]`): System logs are any logs the system decides are relevant, which are not tied directly to an Executor process. Content is implementation specific: format, size, etc. - - **status** (`Optional[str]`): The status of the task. - - **Reference:** [https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask](https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask) - """ - - logs: list[TESExecutorLog] - metadata: Optional[dict[str, str]] - start_time: Optional[datetime] - end_time: Optional[datetime] - outputs: list[TESOutputFileLog] - system_logs: Optional[list[str]] - - @validator('start_time', 'end_time') - def validate_datetime(value): - return convert_to_rfc3339_format(value) + """ + Task logging information. Normally, this will contain only one entry, but in the case where a task fails and is retried, an entry will be appended to this list. + + **Attributes:** + + - **logs** (`list[TESExecutorLog]`): Logs for each executor. + - **metadata** (`Optional[dict[str, str]]`): Arbitrary logging metadata included by the implementation. + - **start_time** (`Optional[datetime]`): When the task started, in RFC 3339 format. + - **end_time** (`Optional[datetime]`): When the task ended, in RFC 3339 format. + - **outputs** (`list[TESOutputFileLog]`): Information about all output files. Directory outputs are flattened into separate items. + - **system_logs** (`Optional[list[str]]`): System logs are any logs the system decides are relevant, which are not tied directly to an Executor process. Content is implementation specific: format, size, etc. + - **status** (`Optional[str]`): The status of the task. + + **Reference:** [https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask](https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask) + """ + + logs: list[TESExecutorLog] + metadata: Optional[dict[str, str]] + start_time: Optional[datetime] + end_time: Optional[datetime] + outputs: list[TESOutputFileLog] + system_logs: Optional[list[str]] + + @validator("start_time", "end_time") + def validate_datetime(value): + return convert_to_rfc3339_format(value) + class TESData(BaseModel): """ @@ -221,7 +239,7 @@ class TESData(BaseModel): - **description** (`Optional[str]`): Optional user-provided description of task for documentation purposes. - **creation_time** (`Optional[str]`): The time the task was created. - **state** (`Optional[str]`): Task state as defined by the server - - **inputs** (`list[TESInput]`): Input files that will be used by the task. + - **inputs** (`list[TESInput]`): Input files that will be used by the task. - **outputs** (`list[TESOutput]`): Output files that will be uploaded from the executor container to long-term storage. - **executors** (`list[Executor]`): An array of executors to be run. - **resources** (`Optional[TESResources]`): The resources required by the TES task. @@ -245,6 +263,6 @@ class TESData(BaseModel): logs: Optional[list[TESTaskLog]] = None tags: Optional[dict[str, str]] = None - @validator('creation_time') + @validator("creation_time") def validate_datetime(value): - return convert_to_rfc3339_format(value) + return convert_to_rfc3339_format(value) diff --git a/crategen/validators.py b/crategen/validators.py index af19e8d..1ab6f93 100644 --- a/crategen/validators.py +++ b/crategen/validators.py @@ -1,28 +1,31 @@ -from pydantic import ValidationError from typing import Union -from .models import WRROCProcess, WRROCWorkflow, WRROCProvenance from urllib.parse import urlparse +from pydantic import ValidationError + +from .models import WRROCProcess, WRROCProvenance, WRROCWorkflow + + def validate_wrroc(data: dict) -> Union[WRROCProvenance, WRROCWorkflow, WRROCProcess]: """ Validate that the input data is a valid WRROC entity and determine which profile it adheres to. - + This function attempts to validate the input data against the WRROCProvenance model first. If that validation fails, it attempts validation against the WRROCWorkflow model. If that also fails, it finally attempts validation against the WRROCProcess model. - + Args: data (dict): The input data to validate. - + Returns: Union[WRROCProvenance, WRROCWorkflow, WRROCProcess]: The validated WRROC data, indicating the highest profile the data adheres to. - + Raises: ValueError: If the data does not adhere to any of the WRROC profiles. """ # Convert '@id' to 'id' for validation purposes - if '@id' in data: - data['id'] = data.pop('@id') + if "@id" in data: + data["id"] = data.pop("@id") errors = [] @@ -42,6 +45,7 @@ def validate_wrroc(data: dict) -> Union[WRROCProvenance, WRROCWorkflow, WRROCPro errors.extend(e.errors()) raise ValueError(f"Invalid WRROC data: {errors}") + def validate_wrroc_tes(data: dict) -> WRROCProcess: """ Validate that the input data contains the fields required for WRROC to TES conversion. @@ -61,13 +65,18 @@ def validate_wrroc_tes(data: dict) -> WRROCProcess: validated_data = validate_wrroc(data) required_fields = ["id", "name", "object", "result"] - missing_fields = [field for field in required_fields if getattr(validated_data, field) is None] + missing_fields = [ + field for field in required_fields if getattr(validated_data, field) is None + ] if missing_fields: - raise ValueError(f"Missing required field(s) for TES conversion: {', '.join(missing_fields)}") + raise ValueError( + f"Missing required field(s) for TES conversion: {', '.join(missing_fields)}" + ) return validated_data + def validate_wrroc_wes(data: dict) -> WRROCWorkflow: """ Validate that the input data contains the fields required for WRROC to WES conversion. @@ -91,19 +100,21 @@ def validate_wrroc_wes(data: dict) -> WRROCWorkflow: required_fields = ["id", "name", "workflowType", "workflowVersion", "result"] - missing_fields = [field for field in required_fields if getattr(validated_data, field) is None] + missing_fields = [ + field for field in required_fields if getattr(validated_data, field) is None + ] if missing_fields: - raise ValueError(f"Missing required field(s) for WES conversion: {', '.join(missing_fields)}") + raise ValueError( + f"Missing required field(s) for WES conversion: {', '.join(missing_fields)}" + ) # Validate URLs in the result field, only if result is not None if validated_data.result is not None: for result in validated_data.result: - url = result['id'] + url = result["id"] parsed_url = urlparse(url) if not all([parsed_url.scheme, parsed_url.netloc]): raise ValueError(f"Invalid URL in result: {url}") return validated_data - - diff --git a/pyproject.toml b/pyproject.toml index 1f82a41..0057e22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,8 @@ pytest-mock = "^3.14.0" [tool.poetry.group.types.dependencies] mypy = "^1.10.1" + + [tool.poetry.scripts] crategen = "crategen.cli:cli" @@ -61,9 +63,13 @@ skips = [ [tool.ruff] exclude = [ - "tests/*", - "tests/unit/*", - "crategen/*" + ".git", + "/.pytest_cache", + "__pycache__", + "build", + "_build", + "dist", + ".env" ] indent-width = 4 @@ -72,11 +78,12 @@ docstring-code-format = true indent-style = "space" line-ending = "lf" quote-style = "double" +skip-magic-trailing-comma = true + [tool.ruff.lint] select = [ "B", # flake8-bugbear - "D", # pydocstyle "E", # pycodestyle "F", # Pyflakes "I", # isort @@ -84,9 +91,11 @@ select = [ "SIM", # flake8-simplify "UP", # pyupgrade ] +ignore = ["E203", "E501"] [tool.ruff.lint.pydocstyle] convention = "google" [tool.typos.default.extend-words] mke = 'mke' + From 998a902fbe793290606df652f4da3d51405324ea Mon Sep 17 00:00:00 2001 From: salihuDickson Date: Thu, 22 Aug 2024 23:37:29 +0100 Subject: [PATCH 4/6] re-implement utils module --- crategen/converters/tes_converter.py | 2 +- crategen/converters/wes_converter.py | 2 +- crategen/utils.py | 34 ++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 crategen/utils.py diff --git a/crategen/converters/tes_converter.py b/crategen/converters/tes_converter.py index b6e9c15..bac25bf 100644 --- a/crategen/converters/tes_converter.py +++ b/crategen/converters/tes_converter.py @@ -1,6 +1,6 @@ from .abstract_converter import AbstractConverter from ..models.tes_models import TESData -from ..models.wrroc_models import WRROCDataTES +from ..models import WRROCDataTES from pydantic import ValidationError diff --git a/crategen/converters/wes_converter.py b/crategen/converters/wes_converter.py index edd438b..94c32cd 100644 --- a/crategen/converters/wes_converter.py +++ b/crategen/converters/wes_converter.py @@ -1,5 +1,5 @@ from .abstract_converter import AbstractConverter -from .utils import convert_to_iso8601 +from ..utils import convert_to_iso8601 from ..models import WESData, WRROCDataWES from pydantic import ValidationError diff --git a/crategen/utils.py b/crategen/utils.py new file mode 100644 index 0000000..cd9fa6d --- /dev/null +++ b/crategen/utils.py @@ -0,0 +1,34 @@ +import datetime + + +def convert_to_iso8601(timestamp): + """ + Convert a given timestamp to ISO 8601 format. + Handles multiple formats including RFC 3339, ISO 8601 with and without fractional seconds. + + Args: + timestamp (str): The timestamp to be converted. + + Returns: + str: The converted timestamp in ISO 8601 format, or None if the input format is incorrect. + """ + if timestamp: + # List of supported formats + formats = [ + "%Y-%m-%dT%H:%M:%S.%fZ", # RFC 3339 with fractional seconds + "%Y-%m-%dT%H:%M:%SZ", # RFC 3339 without fractional seconds + "%Y-%m-%dT%H:%M:%S%z", # ISO 8601 with timezone + "%Y-%m-%dT%H:%M:%S.%f%z", # ISO 8601 with fractional seconds and timezone + ] + for fmt in formats: + try: + return datetime.datetime.strptime(timestamp, fmt).isoformat() + "Z" + except ValueError: + continue + # Handle incorrect format or other issues + return None + return None + + +def convert_to_rfc3339_format(date_time: datetime.datetime): + return date_time.isoformat("T") + "Z" From 21b7c131dab1e6c7c870a2c2e9ecc143d5c9e21a Mon Sep 17 00:00:00 2001 From: salihuDickson Date: Sat, 24 Aug 2024 23:16:15 +0100 Subject: [PATCH 5/6] split data models --- crategen/converters/tes_converter.py | 4 +- crategen/converters/wes_converter.py | 17 +- crategen/models.py | 363 --------------------------- crategen/models/tes_models.py | 2 +- crategen/models/wes_models.py | 91 +++++++ crategen/models/wrroc_models.py | 154 ++++++++++++ crategen/validators.py | 8 +- tests/unit/test_wrroc_models.py | 136 +++++----- 8 files changed, 347 insertions(+), 428 deletions(-) create mode 100644 crategen/models/wes_models.py create mode 100644 crategen/models/wrroc_models.py diff --git a/crategen/converters/tes_converter.py b/crategen/converters/tes_converter.py index 1d588b5..8c0262a 100644 --- a/crategen/converters/tes_converter.py +++ b/crategen/converters/tes_converter.py @@ -1,6 +1,6 @@ from .abstract_converter import AbstractConverter from ..models.tes_models import TESData -from ..models import WRROCDataTES +from ..models.wrroc_models import WRROCDataTES from pydantic import ValidationError @@ -39,7 +39,7 @@ def convert_to_wrroc(self, data: dict) -> dict: logs, tags, ) = data_tes.dict().values() - end_time = data_tes.logs[0].end_time + end_time = logs[0].end_time # Convert to WRROC format wrroc_data = { diff --git a/crategen/converters/wes_converter.py b/crategen/converters/wes_converter.py index 6cc8687..5a5bb5c 100644 --- a/crategen/converters/wes_converter.py +++ b/crategen/converters/wes_converter.py @@ -1,8 +1,10 @@ from .abstract_converter import AbstractConverter from ..utils import convert_to_iso8601 -from ..models import WESData, WRROCDataWES +from ..models.wes_models import WESData +from ..models.wrroc_models import WRROCDataWES from pydantic import ValidationError + class WESConverter(AbstractConverter): def convert_to_wrroc(self, data: dict) -> dict: """ @@ -30,7 +32,10 @@ def convert_to_wrroc(self, data: dict) -> dict: "status": data_wes.state, "startTime": convert_to_iso8601(data_wes.run_log.start_time), "endTime": convert_to_iso8601(data_wes.run_log.end_time), - "result": [{"@id": output.location, "name": output.name} for output in data_wes.outputs], + "result": [ + {"@id": output.location, "name": output.name} + for output in data_wes.outputs + ], } return wrroc_data @@ -51,7 +56,9 @@ def convert_from_wrroc(self, data: dict) -> dict: try: data_wrroc = WRROCDataWES(**data) except ValidationError as e: - raise ValueError(f"Invalid WRROC data for WES conversion: {e.errors()}") from e + raise ValueError( + f"Invalid WRROC data for WES conversion: {e.errors()}" + ) from e # Convert from WRROC to WES format wes_data = { @@ -62,6 +69,8 @@ def convert_from_wrroc(self, data: dict) -> dict: "end_time": data_wrroc.endTime, }, "state": data_wrroc.status, - "outputs": [{"location": res.id, "name": res.name} for res in data_wrroc.result], + "outputs": [ + {"location": res.id, "name": res.name} for res in data_wrroc.result + ], } return wes_data diff --git a/crategen/models.py b/crategen/models.py index 92c133e..e69de29 100644 --- a/crategen/models.py +++ b/crategen/models.py @@ -1,363 +0,0 @@ -from pydantic import BaseModel, AnyUrl, Field, root_validator -from typing import Optional - - -class Executor(BaseModel): - """ - Represents an executor in the Task Execution Service (TES). - - Attributes: - image (str): The Docker image to be used. - command (list[str]): The command to be executed. - workdir (Optional[str]): The working directory for the command. - stdout (Optional[str]): The path to the stdout log. - stderr (Optional[str]): The path to the stderr log. - stdin (Optional[str]): The path to the stdin input. - env (Optional[dict[str, str]]): Environment variables for the command. - """ - image: str - command: list[str] - workdir: Optional[str] = None - stdout: Optional[str] = None - stderr: Optional[str] = None - stdin: Optional[str] = None - env: Optional[dict[str, str]] = None - - -class TESResources(BaseModel): - """ - Represents the resources required by a TES task. - - Attributes: - cpu_cores (Optional[int]): The number of CPU cores required. - preemptible (Optional[bool]): Whether the task can run on preemptible instances. - ram_gb (Optional[float]): The amount of RAM in GB required. - disk_gb (Optional[float]): The amount of disk space in GB required. - zones (Optional[list[str]]): The zones where the task can run. - """ - cpu_cores: Optional[int] = None - preemptible: Optional[bool] = None - ram_gb: Optional[float] = None - disk_gb: Optional[float] = None - zones: Optional[list[str]] = None - - -class TESInputs(BaseModel): - """ - Represents input files in TES. - - Attributes: - name (Optional[str]): The name of the input file. - description (Optional[str]): A brief description of the input. - url (AnyUrl): The URL of the input file. - path (str): The path where the input file should be placed. - type (Optional[str]): The type of input (e.g., FILE, DIRECTORY). - content (Optional[str]): The content of the input file, if provided inline. - """ - name: Optional[str] = None - description: Optional[str] = None - url: AnyUrl - path: str - type: Optional[str] = None - content: Optional[str] = None - - -class TESOutputs(BaseModel): - """ - Represents output files in TES. - - Attributes: - name (Optional[str]): The name of the output file. - description (Optional[str]): A brief description of the output. - url (AnyUrl): The URL of the output file. - path (str): The path where the output file is stored. - type (Optional[str]): The type of output (e.g., FILE, DIRECTORY). - """ - name: Optional[str] = None - description: Optional[str] = None - url: AnyUrl - path: str - type: Optional[str] = None - - -class TESLogs(BaseModel): - """ - Represents logs in TES. - - Attributes: - start_time (Optional[str]): The time the task started. - end_time (Optional[str]): The time the task ended. - stdout (Optional[str]): The path to the stdout log. - stderr (Optional[str]): The path to the stderr log. - exit_code (Optional[int]): The exit code of the task. - host_ip (Optional[str]): The IP address of the host running the task. - metadata (Optional[dict[str, str]]): Additional metadata associated with the task. - """ - start_time: Optional[str] = None - end_time: Optional[str] = None - stdout: Optional[str] = None - stderr: Optional[str] = None - exit_code: Optional[int] = None - host_ip: Optional[str] = None - metadata: Optional[dict[str, str]] = None - - -class TESData(BaseModel): - """ - Represents a TES task. - - Attributes: - id (str): The unique identifier for the TES task. - name (Optional[str]): The name of the TES task. - description (Optional[str]): A brief description of the TES task. - creation_time (Optional[str]): The time the task was created. - state (Optional[str]): The current state of the task. - inputs (list[TESInputs]): The inputs to the TES task. - outputs (list[TESOutputs]): The outputs of the TES task. - executors (list[Executor]): The executors associated with the TES task. - resources (Optional[TESResources]): The resources required by the TES task. - volumes (Optional[list[str]]): The volumes to be mounted in the task. - logs (Optional[list[TESLogs]]): Logs associated with the TES task. - tags (Optional[dict[str, str]]): Tags associated with the task. - error (Optional[dict[str, str]]): Error information if the task failed. - """ - id: str - name: Optional[str] = None - description: Optional[str] = None - creation_time: Optional[str] = None - state: Optional[str] = None - inputs: list[TESInputs] - outputs: list[TESOutputs] - executors: list[Executor] - resources: Optional[TESResources] = None - volumes: Optional[list[str]] = None - logs: Optional[list[TESLogs]] = None - tags: Optional[dict[str, str]] = None - error: Optional[dict[str, str]] = None - - class Config: - extra = "allow" - - -class WESRunLog(BaseModel): - """ - Represents a run log in the Workflow Execution Service (WES). - - Attributes: - name (Optional[str]): The name of the run. - start_time (Optional[str]): The start time of the run. - end_time (Optional[str]): The end time of the run. - cmd (Optional[list[str]]): The command executed in the run. - stdout (Optional[str]): The path to the stdout log. - stderr (Optional[str]): The path to the stderr log. - exit_code (Optional[int]): The exit code of the run. - tes_logs_url (Optional[str]): The URL of the TES logs. - """ - name: Optional[str] = None - start_time: Optional[str] = None - end_time: Optional[str] = None - cmd: Optional[list[str]] = None - stdout: Optional[str] = None - stderr: Optional[str] = None - exit_code: Optional[int] = None - tes_logs_url: Optional[str] = None - - -class WESOutputs(BaseModel): - """ - Represents output files in WES. - - Attributes: - location (str): The URL of the output file. - name (str): The name of the output file. - """ - location: str - name: str - - -class WESRequest(BaseModel): - """ - Represents a workflow request in WES. - - Attributes: - workflow_params (dict[str, str]): The parameters for the workflow. - workflow_type (str): The type of the workflow (e.g., CWL). - workflow_type_version (str): The version of the workflow type. - tags (Optional[dict[str, str]]): Additional tags associated with the workflow. - """ - workflow_params: dict[str, str] - workflow_type: str - workflow_type_version: str - tags: Optional[dict[str, str]] = None - - -class WESData(BaseModel): - """ - Represents a WES run. - - Attributes: - run_id (str): The unique identifier for the WES run. - request (WESRequest): The request associated with the WES run. - state (str): The state of the WES run. - run_log (WESRunLog): The log of the WES run. - task_logs (Optional[list[WESRunLog]]): The logs of individual tasks within the run. - outputs (list[WESOutputs]): The outputs of the WES run. - """ - run_id: str - request: WESRequest - state: str - run_log: WESRunLog - task_logs: Optional[list[WESRunLog]] = Field(None, description="This field is deprecated. Use tes_logs_url instead.") - outputs: list[WESOutputs] - - class Config: - extra = "allow" - - @root_validator - def check_deprecated_fields(cls, values): - if values.get('task_logs') is not None: - print("DeprecationWarning: The 'task_logs' field is deprecated and will be removed in future versions. Use 'tes_logs_url' instead.") - return values - -class WRROCInputs(BaseModel): - """ - A model representing inputs in WRROC. - - Attributes: - id (str): The unique identifier for the input. - name (str): The name of the input. - """ - id: str - name: str - - -class WRROCOutputs(BaseModel): - """ - A model representing outputs in WRROC. - - Attributes: - id (str): The unique identifier for the output. - name (str): The name of the output. - """ - id: str - name: str - - -class WRROCDataBase(BaseModel): - """ - A base model representing common fields for WRROC entities. - - Attributes: - id (str): The unique identifier for the WRROC entity. - name (str): The name of the WRROC entity. - description (Optional[str]): A brief description of the WRROC entity. - instrument (Optional[str]): The instrument used in the WRROC entity. - object (list[WRROCInputs]): A list of input objects related to the WRROC entity. - result (list[WRROCOutputs]): A list of output results related to the WRROC entity. - startTime (Optional[str]): The start time of the WRROC entity. - endTime (Optional[str]): The end time of the WRROC entity. - version (Optional[str]): The version of the WRROC entity. - """ - id: str - name: str - description: Optional[str] = "" - instrument: Optional[str] = None - object: list[WRROCInputs] - result: list[WRROCOutputs] - startTime: Optional[str] = None - endTime: Optional[str] = None - version: Optional[str] = None - - class Config: - extra = "allow" - - -class WRROCData(WRROCDataBase): - """ - A model representing a WRROC entity, inheriting from WRROCDataBase. - """ - pass - - -class WRROCDataTES(WRROCDataBase): - """ - A model representing WRROC data specifically for TES conversion. - - This model inherits from WRROCDataBase and includes all the necessary fields required for TES conversion. - """ - pass - - -class WRROCDataWES(WRROCDataBase): - """ - A model representing WRROC data specifically for WES conversion. - - This model inherits from WRROCDataBase and includes additional fields required for WES conversion. - """ - status: str - - -class WRROCProcess(BaseModel): - """ - A model representing the WRROC Process Run profile. - - Attributes: - id (str): The unique identifier for the WRROC entity. - name (str): The name of the WRROC entity. - description (Optional[str]): A brief description of the WRROC entity. - startTime (Optional[str]): The start time of the process. - endTime (Optional[str]): The end time of the process. - object (Optional[list[dict[str, str]]]): A list of input objects related to the process. - profiles (Optional[list[AnyUrl]]): URLs to the RO-Crate profiles used. - """ - id: str - name: str - description: Optional[str] = "" - startTime: Optional[str] = None - endTime: Optional[str] = None - object: Optional[list[dict[str, str]]] = None - profiles: Optional[list[AnyUrl]] = None - - class Config: - extra = "allow" - - -class WRROCWorkflow(WRROCProcess): - """ - A model representing the WRROC Workflow Run profile, inheriting from WRROCProcess. - - Attributes: - workflowType (Optional[str]): The type of the workflow. - workflowVersion (Optional[str]): The version of the workflow. - result (Optional[list[dict[str, str]]]): A list of output results related to the workflow. - hasPart (Optional[list[AnyUrl]]): A list of parts or steps within the workflow. - """ - workflowType: Optional[str] = None - workflowVersion: Optional[str] = None - result: Optional[list[dict[str, str]]] = None - hasPart: Optional[list[AnyUrl]] = None - - class Config: - extra = "allow" - - -class WRROCProvenance(WRROCWorkflow): - """ - A model representing the WRROC Provenance Run profile, inheriting from WRROCWorkflow. - - Attributes: - provenanceData (Optional[str]): Data related to the provenance of the workflow. - agents (Optional[list[dict[str, str]]]): A list of agents involved in the workflow. - activity (Optional[list[dict[str, str]]]): Activities related to the provenance. - generatedBy (Optional[list[AnyUrl]]): URLs of the entities that generated the data. - used (Optional[list[AnyUrl]]): URLs of the entities that were used in the data generation. - """ - provenanceData: Optional[str] = None - agents: Optional[list[dict[str, str]]] = None - activity: Optional[list[dict[str, str]]] = None - generatedBy: Optional[list[AnyUrl]] = None - used: Optional[list[AnyUrl]] = None - - class Config: - extra = "allow" - diff --git a/crategen/models/tes_models.py b/crategen/models/tes_models.py index 407a9c0..fed8d2c 100644 --- a/crategen/models/tes_models.py +++ b/crategen/models/tes_models.py @@ -158,7 +158,7 @@ def validate_content_and_url(cls, values): if content_is_set: values["url"] = None - elif not content_is_set and not url_is_set: + elif not url_is_set: raise ValueError( "The 'url' attribute is required when the 'content' attribute is empty" ) diff --git a/crategen/models/wes_models.py b/crategen/models/wes_models.py new file mode 100644 index 0000000..0d5d569 --- /dev/null +++ b/crategen/models/wes_models.py @@ -0,0 +1,91 @@ +from pydantic import BaseModel, Field, root_validator +from typing import Optional + + +class WESRunLog(BaseModel): + """ + Represents a run log in the Workflow Execution Service (WES). + + Attributes: + name (Optional[str]): The name of the run. + start_time (Optional[str]): The start time of the run. + end_time (Optional[str]): The end time of the run. + cmd (Optional[list[str]]): The command executed in the run. + stdout (Optional[str]): The path to the stdout log. + stderr (Optional[str]): The path to the stderr log. + exit_code (Optional[int]): The exit code of the run. + tes_logs_url (Optional[str]): The URL of the TES logs. + """ + + name: Optional[str] = None + start_time: Optional[str] = None + end_time: Optional[str] = None + cmd: Optional[list[str]] = None + stdout: Optional[str] = None + stderr: Optional[str] = None + exit_code: Optional[int] = None + tes_logs_url: Optional[str] = None + + +class WESOutputs(BaseModel): + """ + Represents output files in WES. + + Attributes: + location (str): The URL of the output file. + name (str): The name of the output file. + """ + + location: str + name: str + + +class WESRequest(BaseModel): + """ + Represents a workflow request in WES. + + Attributes: + workflow_params (dict[str, str]): The parameters for the workflow. + workflow_type (str): The type of the workflow (e.g., CWL). + workflow_type_version (str): The version of the workflow type. + tags (Optional[dict[str, str]]): Additional tags associated with the workflow. + """ + + workflow_params: dict[str, str] + workflow_type: str + workflow_type_version: str + tags: Optional[dict[str, str]] = None + + +class WESData(BaseModel): + """ + Represents a WES run. + + Attributes: + run_id (str): The unique identifier for the WES run. + request (WESRequest): The request associated with the WES run. + state (str): The state of the WES run. + run_log (WESRunLog): The log of the WES run. + task_logs (Optional[list[WESRunLog]]): The logs of individual tasks within the run. + outputs (list[WESOutputs]): The outputs of the WES run. + """ + + run_id: str + request: WESRequest + state: str + run_log: WESRunLog + task_logs: Optional[list[WESRunLog]] = Field( + None, description="This field is deprecated. Use tes_logs_url instead." + ) + outputs: list[WESOutputs] + + class Config: + extra = "allow" + + @root_validator + def check_deprecated_fields(cls, values): + if values.get("task_logs") is not None: + print( + "DeprecationWarning: The 'task_logs' field is deprecated and will be removed in future versions. Use 'tes_logs_url' instead." + ) + return values diff --git a/crategen/models/wrroc_models.py b/crategen/models/wrroc_models.py new file mode 100644 index 0000000..669dfde --- /dev/null +++ b/crategen/models/wrroc_models.py @@ -0,0 +1,154 @@ +from pydantic import BaseModel, AnyUrl +from typing import Optional + + +class WRROCInputs(BaseModel): + """ + A model representing inputs in WRROC. + + Attributes: + id (str): The unique identifier for the input. + name (str): The name of the input. + """ + + id: str + name: str + + +class WRROCOutputs(BaseModel): + """ + A model representing outputs in WRROC. + + Attributes: + id (str): The unique identifier for the output. + name (str): The name of the output. + """ + + id: str + name: str + + +class WRROCDataBase(BaseModel): + """ + A base model representing common fields for WRROC entities. + + Attributes: + id (str): The unique identifier for the WRROC entity. + name (str): The name of the WRROC entity. + description (Optional[str]): A brief description of the WRROC entity. + instrument (Optional[str]): The instrument used in the WRROC entity. + object (list[WRROCInputs]): A list of input objects related to the WRROC entity. + result (list[WRROCOutputs]): A list of output results related to the WRROC entity. + startTime (Optional[str]): The start time of the WRROC entity. + endTime (Optional[str]): The end time of the WRROC entity. + version (Optional[str]): The version of the WRROC entity. + """ + + id: str + name: str + description: Optional[str] = "" + instrument: Optional[str] = None + object: list[WRROCInputs] + result: list[WRROCOutputs] + startTime: Optional[str] = None + endTime: Optional[str] = None + version: Optional[str] = None + + class Config: + extra = "allow" + + +class WRROCData(WRROCDataBase): + """ + A model representing a WRROC entity, inheriting from WRROCDataBase. + """ + + pass + + +class WRROCDataTES(WRROCDataBase): + """ + A model representing WRROC data specifically for TES conversion. + + This model inherits from WRROCDataBase and includes all the necessary fields required for TES conversion. + """ + + pass + + +class WRROCDataWES(WRROCDataBase): + """ + A model representing WRROC data specifically for WES conversion. + + This model inherits from WRROCDataBase and includes additional fields required for WES conversion. + """ + + status: str + + +class WRROCProcess(BaseModel): + """ + A model representing the WRROC Process Run profile. + + Attributes: + id (str): The unique identifier for the WRROC entity. + name (str): The name of the WRROC entity. + description (Optional[str]): A brief description of the WRROC entity. + startTime (Optional[str]): The start time of the process. + endTime (Optional[str]): The end time of the process. + object (Optional[list[dict[str, str]]]): A list of input objects related to the process. + profiles (Optional[list[AnyUrl]]): URLs to the RO-Crate profiles used. + """ + + id: str + name: str + description: Optional[str] = "" + startTime: Optional[str] = None + endTime: Optional[str] = None + object: Optional[list[dict[str, str]]] = None + profiles: Optional[list[AnyUrl]] = None + + class Config: + extra = "allow" + + +class WRROCWorkflow(WRROCProcess): + """ + A model representing the WRROC Workflow Run profile, inheriting from WRROCProcess. + + Attributes: + workflowType (Optional[str]): The type of the workflow. + workflowVersion (Optional[str]): The version of the workflow. + result (Optional[list[dict[str, str]]]): A list of output results related to the workflow. + hasPart (Optional[list[AnyUrl]]): A list of parts or steps within the workflow. + """ + + workflowType: Optional[str] = None + workflowVersion: Optional[str] = None + result: Optional[list[dict[str, str]]] = None + hasPart: Optional[list[AnyUrl]] = None + + class Config: + extra = "allow" + + +class WRROCProvenance(WRROCWorkflow): + """ + A model representing the WRROC Provenance Run profile, inheriting from WRROCWorkflow. + + Attributes: + provenanceData (Optional[str]): Data related to the provenance of the workflow. + agents (Optional[list[dict[str, str]]]): A list of agents involved in the workflow. + activity (Optional[list[dict[str, str]]]): Activities related to the provenance. + generatedBy (Optional[list[AnyUrl]]): URLs of the entities that generated the data. + used (Optional[list[AnyUrl]]): URLs of the entities that were used in the data generation. + """ + + provenanceData: Optional[str] = None + agents: Optional[list[dict[str, str]]] = None + activity: Optional[list[dict[str, str]]] = None + generatedBy: Optional[list[AnyUrl]] = None + used: Optional[list[AnyUrl]] = None + + class Config: + extra = "allow" diff --git a/crategen/validators.py b/crategen/validators.py index 3d527b7..4cadf0c 100644 --- a/crategen/validators.py +++ b/crategen/validators.py @@ -3,7 +3,13 @@ from pydantic import ValidationError -from .models import WRROCProcess, WRROCProvenance, WRROCWorkflow +from .models.wrroc_models import ( + WRROCProcess, + WRROCProvenance, + WRROCWorkflow, + WRROCDataTES, + WRROCDataWES, +) def validate_wrroc(data: dict) -> Union[WRROCProvenance, WRROCWorkflow, WRROCProcess]: diff --git a/tests/unit/test_wrroc_models.py b/tests/unit/test_wrroc_models.py index 26dec69..ad05a02 100644 --- a/tests/unit/test_wrroc_models.py +++ b/tests/unit/test_wrroc_models.py @@ -1,9 +1,15 @@ import unittest from pydantic import ValidationError -from crategen.models import WRROCProcess, WRROCWorkflow, WRROCProvenance, WRROCDataWES +from crategen.models.wrroc_models import ( + WRROCProcess, + WRROCWorkflow, + WRROCProvenance, + WRROCDataWES, +) from crategen.validators import validate_wrroc, validate_wrroc_tes, validate_wrroc_wes + class TestWRROCProcessModel(unittest.TestCase): """ Unit tests for the WRROCProcess model. @@ -19,7 +25,12 @@ def test_wrroc_process_model(self): "description": "A simple process", "startTime": "2024-07-10T14:30:00Z", "endTime": "2024-07-10T15:30:00Z", - "object": [{"id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", "name": "Input 1"}] + "object": [ + { + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "name": "Input 1", + } + ], } model = WRROCProcess(**data) self.assertEqual(model.id, "process-id") @@ -29,11 +40,7 @@ def test_wrroc_process_empty_object_list(self): """ Test that the WRROCProcess model handles empty object lists correctly. """ - data = { - "id": "process-id", - "name": "Test Process", - "object": [] - } + data = {"id": "process-id", "name": "Test Process", "object": []} model = WRROCProcess(**data) self.assertEqual(model.object, []) @@ -43,11 +50,12 @@ def test_wrroc_process_invalid_data(self): """ data = { "id": 123, # id should be a string - "name": None # name should be a string + "name": None, # name should be a string } with self.assertRaises(ValidationError): WRROCProcess(**data) + class TestWRROCWorkflowModel(unittest.TestCase): """ Unit tests for the WRROCWorkflow model. @@ -62,24 +70,27 @@ def test_wrroc_workflow_model(self): "name": "Test Workflow", "workflowType": "CWL", "workflowVersion": "v1.0", - "result": [{"id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", "name": "Output 1"}] + "result": [ + { + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "Output 1", + } + ], } model = WRROCWorkflow(**data) self.assertEqual(model.workflowType, "CWL") - self.assertEqual(model.result[0]['name'], "Output 1") + self.assertEqual(model.result[0]["name"], "Output 1") def test_wrroc_workflow_missing_optional_fields(self): """ Test that the WRROCWorkflow model handles missing optional fields correctly. """ - data = { - "id": "workflow-id", - "name": "Test Workflow" - } + data = {"id": "workflow-id", "name": "Test Workflow"} model = WRROCWorkflow(**data) self.assertIsNone(model.workflowType) self.assertIsNone(model.workflowVersion) + class TestWRROCProvenanceModel(unittest.TestCase): """ Unit tests for the WRROCProvenance model. @@ -93,24 +104,21 @@ def test_wrroc_provenance_model(self): "id": "provenance-id", "name": "Test Provenance", "provenanceData": "Provenance information", - "agents": [{"id": "agent1", "name": "Agent 1"}] + "agents": [{"id": "agent1", "name": "Agent 1"}], } model = WRROCProvenance(**data) self.assertEqual(model.provenanceData, "Provenance information") - self.assertEqual(model.agents[0]['name'], "Agent 1") + self.assertEqual(model.agents[0]["name"], "Agent 1") def test_wrroc_provenance_empty_agents_list(self): """ Test that the WRROCProvenance model handles empty agents lists correctly. """ - data = { - "id": "provenance-id", - "name": "Test Provenance", - "agents": [] - } + data = {"id": "provenance-id", "name": "Test Provenance", "agents": []} model = WRROCProvenance(**data) self.assertEqual(model.agents, []) + class TestWRROCValidators(unittest.TestCase): """ Unit tests for the WRROC validators to ensure they work as expected. @@ -120,10 +128,7 @@ def test_validate_wrroc_process(self): """ Test that validate_wrroc correctly identifies a WRROCProcess entity. """ - data = { - "id": "process-id", - "name": "Test Process" - } + data = {"id": "process-id", "name": "Test Process"} model = validate_wrroc(data) self.assertIsInstance(model, WRROCProcess) @@ -135,7 +140,7 @@ def test_validate_wrroc_workflow(self): "id": "workflow-id", "name": "Test Workflow", "workflowType": "CWL", - "workflowVersion": "v1.0" + "workflowVersion": "v1.0", } model = validate_wrroc(data) self.assertIsInstance(model, WRROCWorkflow) @@ -147,7 +152,7 @@ def test_validate_wrroc_provenance(self): data = { "id": "provenance-id", "name": "Test Provenance", - "provenanceData": "Provenance information" + "provenanceData": "Provenance information", } model = validate_wrroc(data) self.assertIsInstance(model, WRROCProvenance) @@ -156,9 +161,7 @@ def test_validate_wrroc_invalid(self): """ Test that validate_wrroc raises a ValueError for invalid WRROC data. """ - data = { - "unknown_field": "unexpected" - } + data = {"unknown_field": "unexpected"} with self.assertRaises(ValueError): validate_wrroc(data) @@ -169,8 +172,18 @@ def test_validate_wrroc_tes(self): data = { "id": "process-id", "name": "Test Process", - "object": [{"id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", "name": "Input 1"}], - "result": [{"id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", "name": "Output 1"}] + "object": [ + { + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "name": "Input 1", + } + ], + "result": [ + { + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "Output 1", + } + ], } model = validate_wrroc_tes(data) self.assertEqual(model.id, "process-id") @@ -184,7 +197,12 @@ def test_validate_wrroc_tes_empty_object_list(self): "id": "process-id", "name": "Test Process", "object": [], - "result": [{"id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", "name": "Output 1"}] + "result": [ + { + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "Output 1", + } + ], } model = validate_wrroc_tes(data) self.assertEqual(model.object, []) @@ -193,29 +211,35 @@ def test_validate_wrroc_tes_missing_fields(self): """ Test that validate_wrroc_tes raises a ValueError if required fields for TES conversion are missing. """ - data = { - "id": "process-id", - "name": "Test Process" - } + data = {"id": "process-id", "name": "Test Process"} with self.assertRaises(ValueError): validate_wrroc_tes(data) def test_validate_wrroc_wes(self): - """ - Test that validate_wrroc_wes correctly validates a WRROC entity for WES conversion. - """ - data = { - "id": "workflow-id", - "name": "Test Workflow", - "workflowType": "CWL", - "workflowVersion": "v1.0", - "status": "completed", - "object": [{"id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", "name": "Input 1"}], - "result": [{"id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", "name": "Output 1"}] - } - model = validate_wrroc_wes(data) - self.assertIsInstance(model, WRROCDataWES) - + """ + Test that validate_wrroc_wes correctly validates a WRROC entity for WES conversion. + """ + data = { + "id": "workflow-id", + "name": "Test Workflow", + "workflowType": "CWL", + "workflowVersion": "v1.0", + "status": "completed", + "object": [ + { + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "name": "Input 1", + } + ], + "result": [ + { + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "Output 1", + } + ], + } + model = validate_wrroc_wes(data) + self.assertIsInstance(model, WRROCDataWES) def test_validate_wrroc_wes_invalid_url(self): """ @@ -234,7 +258,7 @@ def test_validate_wrroc_wes_invalid_url(self): "name": "Test Workflow", "workflowType": "CWL", "workflowVersion": "v1.0", - "result": [{"id": url, "name": "Output 1"}] + "result": [{"id": url, "name": "Output 1"}], } with self.assertRaises(ValueError): validate_wrroc_wes(data) @@ -243,12 +267,10 @@ def test_validate_wrroc_wes_missing_fields(self): """ Test that validate_wrroc_wes raises a ValueError if required fields for WES conversion are missing. """ - data = { - "id": "workflow-id", - "name": "Test Workflow" - } + data = {"id": "workflow-id", "name": "Test Workflow"} with self.assertRaises(ValueError): validate_wrroc_wes(data) + if __name__ == "__main__": unittest.main() From 2aaa5f15b5c9f4eb2c74ed81e328986868d41b0d Mon Sep 17 00:00:00 2001 From: salihuDickson Date: Sat, 24 Aug 2024 23:17:30 +0100 Subject: [PATCH 6/6] lint code --- crategen/converter_manager.py | 1 + crategen/converters/tes_converter.py | 5 +++-- crategen/converters/wes_converter.py | 7 ++++--- crategen/models/wes_models.py | 3 ++- crategen/models/wrroc_models.py | 3 ++- crategen/validators.py | 4 ++-- tests/unit/test_wrroc_models.py | 5 +++-- 7 files changed, 17 insertions(+), 11 deletions(-) diff --git a/crategen/converter_manager.py b/crategen/converter_manager.py index 3a0ef6b..756a565 100644 --- a/crategen/converter_manager.py +++ b/crategen/converter_manager.py @@ -1,6 +1,7 @@ from .converters.tes_converter import TESConverter from .converters.wes_converter import WESConverter + class ConverterManager: def __init__(self): self.tes_converter = TESConverter() diff --git a/crategen/converters/tes_converter.py b/crategen/converters/tes_converter.py index 8c0262a..0d7a0b9 100644 --- a/crategen/converters/tes_converter.py +++ b/crategen/converters/tes_converter.py @@ -1,7 +1,8 @@ -from .abstract_converter import AbstractConverter +from pydantic import ValidationError + from ..models.tes_models import TESData from ..models.wrroc_models import WRROCDataTES -from pydantic import ValidationError +from .abstract_converter import AbstractConverter class TESConverter(AbstractConverter): diff --git a/crategen/converters/wes_converter.py b/crategen/converters/wes_converter.py index 5a5bb5c..619e069 100644 --- a/crategen/converters/wes_converter.py +++ b/crategen/converters/wes_converter.py @@ -1,8 +1,9 @@ -from .abstract_converter import AbstractConverter -from ..utils import convert_to_iso8601 +from pydantic import ValidationError + from ..models.wes_models import WESData from ..models.wrroc_models import WRROCDataWES -from pydantic import ValidationError +from ..utils import convert_to_iso8601 +from .abstract_converter import AbstractConverter class WESConverter(AbstractConverter): diff --git a/crategen/models/wes_models.py b/crategen/models/wes_models.py index 0d5d569..e6e37b7 100644 --- a/crategen/models/wes_models.py +++ b/crategen/models/wes_models.py @@ -1,6 +1,7 @@ -from pydantic import BaseModel, Field, root_validator from typing import Optional +from pydantic import BaseModel, Field, root_validator + class WESRunLog(BaseModel): """ diff --git a/crategen/models/wrroc_models.py b/crategen/models/wrroc_models.py index 669dfde..c01d17a 100644 --- a/crategen/models/wrroc_models.py +++ b/crategen/models/wrroc_models.py @@ -1,6 +1,7 @@ -from pydantic import BaseModel, AnyUrl from typing import Optional +from pydantic import AnyUrl, BaseModel + class WRROCInputs(BaseModel): """ diff --git a/crategen/validators.py b/crategen/validators.py index 4cadf0c..16dfc1c 100644 --- a/crategen/validators.py +++ b/crategen/validators.py @@ -4,11 +4,11 @@ from pydantic import ValidationError from .models.wrroc_models import ( + WRROCDataTES, + WRROCDataWES, WRROCProcess, WRROCProvenance, WRROCWorkflow, - WRROCDataTES, - WRROCDataWES, ) diff --git a/tests/unit/test_wrroc_models.py b/tests/unit/test_wrroc_models.py index ad05a02..1e9538e 100644 --- a/tests/unit/test_wrroc_models.py +++ b/tests/unit/test_wrroc_models.py @@ -1,11 +1,12 @@ import unittest + from pydantic import ValidationError from crategen.models.wrroc_models import ( + WRROCDataWES, WRROCProcess, - WRROCWorkflow, WRROCProvenance, - WRROCDataWES, + WRROCWorkflow, ) from crategen.validators import validate_wrroc, validate_wrroc_tes, validate_wrroc_wes