-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
test(tes,wes): add comprehensive unit tests #17
base: main
Are you sure you want to change the base?
Changes from 4 commits
de59af2
879a84a
ddaa0de
75ee3a5
29f774d
1300eb3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,52 +1,44 @@ | ||
from pydantic import ValidationError | ||
from .abstract_converter import AbstractConverter | ||
from .utils import convert_to_iso8601 | ||
from ..models import TESData, WRROCData | ||
|
||
class TESConverter(AbstractConverter): | ||
|
||
def convert_to_wrroc(self, tes_data): | ||
# Validate and extract data with defaults | ||
id = tes_data.get("id", "") | ||
name = tes_data.get("name", "") | ||
description = tes_data.get("description", "") | ||
executors = tes_data.get("executors", [{}]) | ||
inputs = tes_data.get("inputs", []) | ||
outputs = tes_data.get("outputs", []) | ||
creation_time = tes_data.get("creation_time", "") | ||
end_time = tes_data.get("logs", [{}])[0].get("end_time", "") # Corrected to fetch from logs | ||
try: | ||
validated_tes_data = TESData(**tes_data) | ||
except ValidationError as e: | ||
raise ValueError(f"Invalid TES data: {e}") | ||
|
||
# Convert to WRROC | ||
wrroc_data = { | ||
"@id": id, | ||
"name": name, | ||
"description": description, | ||
"instrument": executors[0].get("image", None) if executors else None, | ||
"object": [{"@id": input.get("url", ""), "name": input.get("path", "")} for input in inputs], | ||
"result": [{"@id": output.get("url", ""), "name": output.get("path", "")} for output in outputs], | ||
"startTime": convert_to_iso8601(creation_time), | ||
"endTime": convert_to_iso8601(end_time), | ||
"@id": validated_tes_data.id, | ||
"name": validated_tes_data.name, | ||
"description": validated_tes_data.description, | ||
"instrument": validated_tes_data.executors[0].image if validated_tes_data.executors else None, | ||
"object": [{"@id": input.url, "name": input.path} for input in validated_tes_data.inputs], | ||
"result": [{"@id": output.url, "name": output.path} for output in validated_tes_data.outputs], | ||
"startTime": convert_to_iso8601(validated_tes_data.creation_time), | ||
"endTime": convert_to_iso8601(validated_tes_data.logs[0].end_time) if validated_tes_data.logs else None, | ||
} | ||
return wrroc_data | ||
|
||
def convert_from_wrroc(self, wrroc_data): | ||
# Validate and extract data with defaults | ||
id = wrroc_data.get("@id", "") | ||
name = wrroc_data.get("name", "") | ||
description = wrroc_data.get("description", "") | ||
instrument = wrroc_data.get("instrument", "") | ||
object_data = wrroc_data.get("object", []) | ||
result_data = wrroc_data.get("result", []) | ||
start_time = wrroc_data.get("startTime", "") | ||
end_time = wrroc_data.get("endTime", "") | ||
try: | ||
# Filter only the fields relevant to WRROCData | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can remove this comment, the code and variable name are obvious enough. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will remove the redundant comment in the next PR to improve code readability. |
||
wrroc_filtered_data = {key: wrroc_data.get(key) for key in WRROCData.__fields__ if key in wrroc_data} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you use And why do the converters for TES (filter irrelevant/extra fields) and WES (raise error for extra fields) behave so differently? Maybe there is a good reason for the different behavior that I am missing. In that case, please explain. However, if there is no good reason, please apply a consistent behavior (probably the one used in TES). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will align the behavior of TES and WES converters to ensure consistent handling of irrelevant/extra fields by filtering them out. Additionally, I will standardize the use of |
||
validated_wrroc_data = WRROCData(**wrroc_filtered_data) | ||
except ValidationError as e: | ||
raise ValueError(f"Invalid WRROC data: {e}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In such cases, it is good practice to chain exceptions, i.e., instead of except Exception as exc:
raise Exception(f"My own text: {exc}") do except Exception as exc:
raise Exception("My own text") from exc Please address this in a future PR for all these cases (but not in this PR). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will address exception chaining in all relevant cases in a future PR to follow best practices as you suggested. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay. Please create an issue for that and then close this conversation. |
||
|
||
# Convert from WRROC to TES | ||
tes_data = { | ||
"id": id, | ||
"name": name, | ||
"description": description, | ||
"executors": [{"image": instrument}], | ||
"inputs": [{"url": obj.get("@id", ""), "path": obj.get("name", "")} for obj in object_data], | ||
"outputs": [{"url": res.get("@id", ""), "path": res.get("name", "")} for res in result_data], | ||
"creation_time": start_time, | ||
"logs": [{"end_time": end_time}], # Added to logs | ||
"id": validated_wrroc_data.id, | ||
"name": validated_wrroc_data.name, | ||
"description": validated_wrroc_data.description, | ||
"executors": [{"image": validated_wrroc_data.instrument}], | ||
"inputs": [{"url": obj.id, "path": obj.name} for obj in validated_wrroc_data.object], | ||
"outputs": [{"url": res.id, "path": res.name} for res in validated_wrroc_data.result], | ||
"creation_time": validated_wrroc_data.startTime, | ||
"logs": [{"end_time": validated_wrroc_data.endTime}], | ||
} | ||
return tes_data |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,46 +1,50 @@ | ||
from pydantic import ValidationError | ||
from .abstract_converter import AbstractConverter | ||
from ..models import WESData, WRROCDataWES | ||
from .utils import convert_to_iso8601 | ||
|
||
class WESConverter(AbstractConverter): | ||
|
||
def convert_to_wrroc(self, wes_data): | ||
# Validate and extract data with defaults | ||
run_id = wes_data.get("run_id", "") | ||
name = wes_data.get("run_log", {}).get("name", "") | ||
state = wes_data.get("state", "") | ||
start_time = wes_data.get("run_log", {}).get("start_time", "") | ||
end_time = wes_data.get("run_log", {}).get("end_time", "") | ||
outputs = wes_data.get("outputs", {}) | ||
|
||
# Convert to WRROC | ||
try: | ||
wes_model = WESData(**wes_data) | ||
except ValidationError as e: | ||
raise ValueError(f"Invalid WES data: {e}") | ||
outputs = wes_model.outputs | ||
|
||
wrroc_data = { | ||
"@id": run_id, | ||
"name": name, | ||
"status": state, | ||
"startTime": convert_to_iso8601(start_time), | ||
"endTime": convert_to_iso8601(end_time), | ||
"result": [{"@id": output.get("location", ""), "name": output.get("name", "")} for output in outputs], | ||
"@id": wes_model.run_id, | ||
"name": wes_model.run_log.name, | ||
"status": wes_model.state, | ||
"startTime": convert_to_iso8601(wes_model.run_log.start_time), | ||
"endTime": convert_to_iso8601(wes_model.run_log.end_time), | ||
"result": [{"@id": output.location, "name": output.name} for output in outputs], | ||
} | ||
return wrroc_data | ||
|
||
def convert_from_wrroc(self, wrroc_data): | ||
# Validate and extract data with defaults | ||
run_id = wrroc_data.get("@id", "") | ||
name = wrroc_data.get("name", "") | ||
start_time = wrroc_data.get("startTime", "") | ||
end_time = wrroc_data.get("endTime", "") | ||
state = wrroc_data.get("status", "") | ||
result_data = wrroc_data.get("result", []) | ||
|
||
# Convert from WRROC to WES | ||
allowed_fields = set(WRROCDataWES.__fields__.keys()) | ||
unexpected_fields = set(wrroc_data.keys()) - allowed_fields | ||
|
||
if unexpected_fields: | ||
raise ValueError(f"Unexpected fields in WRROC data: {unexpected_fields}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand why unexpected fields should raise an error? How do they bother you? It is quite likely that any real world (WR)ROC would raise an error here. The behavior in the TES converter makes much more sense: filter out what you don't need and then validate the remainder against what you need. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. I will update the WES converter to filter out unexpected fields, similar to the TES converter, to handle real-world WRROC data more robustly. |
||
|
||
try: | ||
wrroc_filtered_data = {key: wrroc_data.get(key) for key in WRROCDataWES.__fields__} | ||
wrroc_model = WRROCDataWES(**wrroc_filtered_data) | ||
except ValidationError as e: | ||
raise ValueError(f"Invalid WRROC data: {e}") | ||
|
||
result_data = wrroc_model.result | ||
|
||
wes_data = { | ||
"run_id": run_id, | ||
"run_id": wrroc_model.id, | ||
"run_log": { | ||
"name": name, | ||
"start_time": start_time, | ||
"end_time": end_time, | ||
"name": wrroc_model.name, | ||
"start_time": wrroc_model.startTime, | ||
"end_time": wrroc_model.endTime, | ||
}, | ||
"state": state, | ||
"outputs": [{"location": res.get("@id", ""), "name": res.get("name", "")} for res in result_data], | ||
"state": wrroc_model.status, | ||
"outputs": [{"location": res.id, "name": res.name} for res in result_data], | ||
} | ||
return wes_data |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When running the tool with WRROC data as input, you probably want to first validate that the input is indeed a valid WRROC entity. For that, you could use a model In a next step, you then want to validate that the WRROC data has all the fields required for the requested conversion. This code is specific to the converter, i.e., you may need different data for the conversion to TES than you need for the conversion to WES. For that you could define models There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will implement an external WRROC validator function to ensure WRROC data validity and avoid code repetition. Additionally, I will define separate models for WRROCDataTES and WRROCDataWES for specific validation in the respective converters. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please double check your
I think the models and the corresponding validators (including unit tests etc.) should be a single PR. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
from pydantic import BaseModel, Field, validator, root_validator | ||
from typing import List, Optional, Dict | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When using Python 3.9 or newer (which I hope you do), please use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will update the type hints to use list and dict instead of List and Dict to align with Python 3.9+ standards. |
||
|
||
class Executor(BaseModel): | ||
image: str | ||
command: List[str] | ||
|
||
class TESInputs(BaseModel): | ||
url: str | ||
path: str | ||
|
||
class TESOutputs(BaseModel): | ||
url: str | ||
path: str | ||
|
||
class TESLogs(BaseModel): | ||
end_time: Optional[str] = None | ||
|
||
class TESData(BaseModel): | ||
id: str | ||
name: str | ||
description: Optional[str] = "" | ||
executors: List[Executor] | ||
inputs: List[TESInputs] | ||
outputs: List[TESOutputs] | ||
creation_time: str | ||
logs: List[TESLogs] | ||
|
||
class Config: | ||
extra = "forbid" | ||
|
||
class WESRunLog(BaseModel): | ||
name: Optional[str] = None | ||
start_time: Optional[str] = None | ||
end_time: Optional[str] = None | ||
cmd: Optional[List[str]] = None | ||
stdout: Optional[str] = None | ||
stderr: Optional[str] = None | ||
exit_code: Optional[int] = None | ||
|
||
class WESOutputs(BaseModel): | ||
location: str | ||
name: str | ||
|
||
class WESRequest(BaseModel): | ||
workflow_params: Dict[str, str] | ||
workflow_type: str | ||
workflow_type_version: str | ||
tags: Optional[Dict[str, str]] = None | ||
|
||
class WESData(BaseModel): | ||
run_id: str | ||
request: WESRequest | ||
state: str | ||
run_log: WESRunLog | ||
task_logs: Optional[List[WESRunLog]] = None | ||
outputs: List[WESOutputs] | ||
|
||
class Config: | ||
extra = "forbid" | ||
|
||
class WRROCInputs(BaseModel): | ||
id: str | ||
name: str | ||
|
||
class WRROCOutputs(BaseModel): | ||
id: str | ||
name: str | ||
|
||
class WRROCData(BaseModel): | ||
id: str | ||
name: str | ||
description: Optional[str] = "" | ||
instrument: Optional[str] = None | ||
object: List[WRROCInputs] | ||
result: List[WRROCOutputs] | ||
startTime: Optional[str] = None | ||
endTime: Optional[str] = None | ||
|
||
class Config: | ||
extra = "forbid" | ||
|
||
class WRROCDataWES(BaseModel): | ||
id: str | ||
name: str | ||
status: str | ||
result: List[WRROCOutputs] | ||
startTime: Optional[str] = None | ||
endTime: Optional[str] = None | ||
|
||
class Config: | ||
extra = "forbid" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't it be expressive enough to call the arg just
data
in all cases, instead oftes_data
,wes_data
andwrroc_data
? The input "type" is already included in the function names.Also, please add the following in your next PR for ALL methods/functions (should have already been there, I didn't pay attention during review):
See our contributor guide for details.
Please do not add it in this PR, otherwise it will take forever to review and merge it. But make sure it is the very next PR, as it is very important to us to have well documented and typed code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the feedback. I will rename the arguments to data in all cases for consistency. I will also ensure that all methods/functions include type hints and Google-style docstrings in the next PR as per the contributor guide. I understand the importance of well-documented and typed code.