From de59af26eb5a6e421bb0dae8cc400f4f26b18a81 Mon Sep 17 00:00:00 2001 From: karanjot786 Date: Fri, 26 Jul 2024 12:26:15 +0530 Subject: [PATCH 1/6] test: add comprehensive unit tests for TES and WES converters --- .github/workflows/ci.yml | 8 +- tests/data/input/tes_example.json | 16 +++ .../tes_example_with_missing_fields.json | 10 ++ tests/data/input/wes_example.json | 12 ++ tests/data/output/tes_to_wrroc_output.json | 24 ++++ ...s_to_wrroc_output_with_missing_fields.json | 10 ++ tests/data/output/wes_to_wrroc_outpu.json | 13 ++ tests/unit/test_tes_converter.py | 122 ++++++++++++++++++ tests/unit/test_wes_converter.py | 98 ++++++++++++++ 9 files changed, 309 insertions(+), 4 deletions(-) create mode 100644 tests/data/input/tes_example.json create mode 100644 tests/data/input/tes_example_with_missing_fields.json create mode 100644 tests/data/input/wes_example.json create mode 100644 tests/data/output/tes_to_wrroc_output.json create mode 100644 tests/data/output/tes_to_wrroc_output_with_missing_fields.json create mode 100644 tests/data/output/wes_to_wrroc_outpu.json create mode 100644 tests/unit/test_tes_converter.py create mode 100644 tests/unit/test_wes_converter.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fa999e2..4d1b499 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: - name: Lint with Ruff run: | - poetry run ruff check crategen/ + poetry run ruff check crategen/ tests/ - name: Type check with Mypy run: | @@ -39,6 +39,6 @@ jobs: run: | poetry add pytest pytest-cov pytest-mock - # - name: Run tests - # run: | - # poetry run pytest --cov=crategen + - name: Run tests + run: | + poetry run pytest --cov=crategen diff --git a/tests/data/input/tes_example.json b/tests/data/input/tes_example.json new file mode 100644 index 0000000..9a8b38f --- /dev/null +++ b/tests/data/input/tes_example.json @@ -0,0 +1,16 @@ +{ + "id": "task-id-1", + "name": "example-task-1", + "description": "Example task description 1", + "executors": [{"image": "executor-image-1"}], + "inputs": [ + {"url": "input-url-1", "path": "input-path-1"}, + {"url": "input-url-2", "path": "input-path-2"} + ], + "outputs": [ + {"url": "output-url-1", "path": "output-path-1"} + ], + "creation_time": "2023-07-10T14:30:00Z", + "logs": [{"end_time": "2023-07-10T15:30:00Z"}] +} + diff --git a/tests/data/input/tes_example_with_missing_fields.json b/tests/data/input/tes_example_with_missing_fields.json new file mode 100644 index 0000000..ad1238a --- /dev/null +++ b/tests/data/input/tes_example_with_missing_fields.json @@ -0,0 +1,10 @@ +{ + "id": "task-id-3", + "name": "example-task-3", + "description": "Example task description 3", + "executors": [{"image": "executor-image-3"}], + "inputs": [], + "outputs": [], + "creation_time": "2023-07-12T12:00:00Z", + "logs": [{"end_time": "2023-07-12T12:30:00Z"}] +} diff --git a/tests/data/input/wes_example.json b/tests/data/input/wes_example.json new file mode 100644 index 0000000..da58cdc --- /dev/null +++ b/tests/data/input/wes_example.json @@ -0,0 +1,12 @@ +{ + "run_id": "run-id-1", + "run_log": { + "name": "example-run-1", + "start_time": "2023-07-10T14:30:00Z", + "end_time": "2023-07-10T15:30:00Z" + }, + "state": "COMPLETED", + "outputs": [ + {"location": "output-location-1", "name": "output-name-1"} + ] +} diff --git a/tests/data/output/tes_to_wrroc_output.json b/tests/data/output/tes_to_wrroc_output.json new file mode 100644 index 0000000..85041d7 --- /dev/null +++ b/tests/data/output/tes_to_wrroc_output.json @@ -0,0 +1,24 @@ +{ + "@id": "task-id-1", + "name": "example-task-1", + "description": "Example task description 1", + "instrument": "executor-image-1", + "object": [ + { + "@id": "input-url-1", + "name": "input-path-1" + }, + { + "@id": "input-url-2", + "name": "input-path-2" + } + ], + "result": [ + { + "@id": "output-url-1", + "name": "output-path-1" + } + ], + "startTime": "2023-07-10T14:30:00Z", + "endTime": "2023-07-10T15:30:00Z" +} \ No newline at end of file diff --git a/tests/data/output/tes_to_wrroc_output_with_missing_fields.json b/tests/data/output/tes_to_wrroc_output_with_missing_fields.json new file mode 100644 index 0000000..5daf62d --- /dev/null +++ b/tests/data/output/tes_to_wrroc_output_with_missing_fields.json @@ -0,0 +1,10 @@ +{ + "@id": "task-id-3", + "name": "example-task-3", + "description": "Example task description 3", + "instrument": "executor-image-3", + "object": [], + "result": [], + "startTime": "2023-07-12T12:00:00Z", + "endTime": "2023-07-12T12:30:00Z" +} \ No newline at end of file diff --git a/tests/data/output/wes_to_wrroc_outpu.json b/tests/data/output/wes_to_wrroc_outpu.json new file mode 100644 index 0000000..6e88afc --- /dev/null +++ b/tests/data/output/wes_to_wrroc_outpu.json @@ -0,0 +1,13 @@ +{ + "@id": "run-id-1", + "name": "example-run-1", + "status": "COMPLETED", + "startTime": "2023-07-10T14:30:00Z", + "endTime": "2023-07-10T15:30:00Z", + "result": [ + { + "@id": "output-location-1", + "name": "output-name-1" + } + ] +} \ No newline at end of file diff --git a/tests/unit/test_tes_converter.py b/tests/unit/test_tes_converter.py new file mode 100644 index 0000000..874b032 --- /dev/null +++ b/tests/unit/test_tes_converter.py @@ -0,0 +1,122 @@ +import unittest +from crategen.converters.tes_converter import TESConverter + +class TestTESConverter(unittest.TestCase): + + def setUp(self): + self.converter = TESConverter() + + def test_convert_to_wrroc(self): + tes_data = { + "id": "task-id-1", + "name": "example-task-1", + "description": "Example task description 1", + "executors": [{"image": "executor-image-1"}], + "inputs": [ + {"url": "input-url-1", "path": "input-path-1"}, + {"url": "input-url-2", "path": "input-path-2"} + ], + "outputs": [ + {"url": "output-url-1", "path": "output-path-1"} + ], + "creation_time": "2023-07-10T14:30:00Z", + "logs": [{"end_time": "2023-07-10T15:30:00Z"}] + } + + expected_wrroc_data = { + "@id": "task-id-1", + "name": "example-task-1", + "description": "Example task description 1", + "instrument": "executor-image-1", + "object": [ + {"@id": "input-url-1", "name": "input-path-1"}, + {"@id": "input-url-2", "name": "input-path-2"} + ], + "result": [ + {"@id": "output-url-1", "name": "output-path-1"} + ], + "startTime": "2023-07-10T14:30:00Z", + "endTime": "2023-07-10T15:30:00Z" + } + + result = self.converter.convert_to_wrroc(tes_data) + self.assertEqual(result, expected_wrroc_data) + + def test_convert_from_wrroc(self): + wrroc_data = { + "@id": "task-id-1", + "name": "example-task-1", + "description": "Example task description 1", + "instrument": "executor-image-1", + "object": [ + {"@id": "input-url-1", "name": "input-path-1"}, + {"@id": "input-url-2", "name": "input-path-2"} + ], + "result": [ + {"@id": "output-url-1", "name": "output-path-1"} + ], + "startTime": "2023-07-10T14:30:00Z", + "endTime": "2023-07-10T15:30:00Z" + } + + expected_tes_data = { + "id": "task-id-1", + "name": "example-task-1", + "description": "Example task description 1", + "executors": [{"image": "executor-image-1"}], + "inputs": [ + {"url": "input-url-1", "path": "input-path-1"}, + {"url": "input-url-2", "path": "input-path-2"} + ], + "outputs": [ + {"url": "output-url-1", "path": "output-path-1"} + ], + "creation_time": "2023-07-10T14:30:00Z", + "logs": [{"end_time": "2023-07-10T15:30:00Z"}] + } + + result = self.converter.convert_from_wrroc(wrroc_data) + self.assertEqual(result, expected_tes_data) + + def test_convert_to_wrroc_missing_fields(self): + tes_data = { + "id": "task-id-2", + "name": "example-task-2" + } + + expected_wrroc_data = { + "@id": "task-id-2", + "name": "example-task-2", + "description": "", + "instrument": None, + "object": [], + "result": [], + "startTime": None, + "endTime": None + } + + result = self.converter.convert_to_wrroc(tes_data) + self.assertEqual(result, expected_wrroc_data) + + def test_convert_from_wrroc_missing_fields(self): + wrroc_data = { + "@id": "task-id-2", + "name": "example-task-2" + } + + expected_tes_data = { + "id": "task-id-2", + "name": "example-task-2", + "description": "", + "executors": [{"image": ""}], + "inputs": [], + "outputs": [], + "creation_time": "", + "logs": [{"end_time": ""}] + } + + result = self.converter.convert_from_wrroc(wrroc_data) + self.assertEqual(result, expected_tes_data) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/test_wes_converter.py b/tests/unit/test_wes_converter.py new file mode 100644 index 0000000..b1e88af --- /dev/null +++ b/tests/unit/test_wes_converter.py @@ -0,0 +1,98 @@ +import unittest +from crategen.converters.wes_converter import WESConverter + +class TestWESConverter(unittest.TestCase): + + def setUp(self): + self.converter = WESConverter() + + def test_convert_to_wrroc(self): + wes_data = { + "run_id": "run-id-1", + "run_log": { + "name": "example-run-1", + "start_time": "2023-07-10T14:30:00Z", + "end_time": "2023-07-10T15:30:00Z" + }, + "state": "COMPLETED", + "outputs": [{"location": "output-location-1", "name": "output-name-1"}] + } + + expected_wrroc_data = { + "@id": "run-id-1", + "name": "example-run-1", + "status": "COMPLETED", + "startTime": "2023-07-10T14:30:00Z", + "endTime": "2023-07-10T15:30:00Z", + "result": [{"@id": "output-location-1", "name": "output-name-1"}] + } + + result = self.converter.convert_to_wrroc(wes_data) + self.assertEqual(result, expected_wrroc_data) + + def test_convert_from_wrroc(self): + wrroc_data = { + "@id": "run-id-1", + "name": "example-run-1", + "status": "COMPLETED", + "startTime": "2023-07-10T14:30:00Z", + "endTime": "2023-07-10T15:30:00Z", + "result": [{"@id": "output-location-1", "name": "output-name-1"}] + } + + expected_wes_data = { + "run_id": "run-id-1", + "run_log": { + "name": "example-run-1", + "start_time": "2023-07-10T14:30:00Z", + "end_time": "2023-07-10T15:30:00Z" + }, + "state": "COMPLETED", + "outputs": [{"location": "output-location-1", "name": "output-name-1"}] + } + + result = self.converter.convert_from_wrroc(wrroc_data) + self.assertEqual(result, expected_wes_data) + + def test_convert_to_wrroc_missing_fields(self): + wes_data = { + "run_id": "run-id-2", + "run_log": { + "name": "example-run-2" + } + } + + expected_wrroc_data = { + "@id": "run-id-2", + "name": "example-run-2", + "status": "", + "startTime": None, + "endTime": None, + "result": [] + } + + result = self.converter.convert_to_wrroc(wes_data) + self.assertEqual(result, expected_wrroc_data) + + def test_convert_from_wrroc_missing_fields(self): + wrroc_data = { + "@id": "run-id-2", + "name": "example-run-2" + } + + expected_wes_data = { + "run_id": "run-id-2", + "run_log": { + "name": "example-run-2", + "start_time": "", + "end_time": "" + }, + "state": "", + "outputs": [] + } + + result = self.converter.convert_from_wrroc(wrroc_data) + self.assertEqual(result, expected_wes_data) + +if __name__ == '__main__': + unittest.main() From 879a84a0eb6a9c4ecf27217cbe92e5ed65a70566 Mon Sep 17 00:00:00 2001 From: karanjot786 Date: Sat, 27 Jul 2024 12:12:06 +0530 Subject: [PATCH 2/6] test: add comprehensive unit tests for TES and WES converters --- crategen/converters/tes_converter.py | 16 +- crategen/converters/wes_converter.py | 71 ++++---- tests/data/input/tes_example.json | 16 -- .../tes_example_with_missing_fields.json | 10 -- tests/data/input/wes_example.json | 12 -- tests/data/input/wrroc_from_tes_full.json | 80 +++++++++ tests/data/input/wrroc_from_tes_minimal.json | 30 ++++ tests/data/input/wrroc_from_wes_full.json | 18 +++ tests/data/input/wrroc_from_wes_minimal.json | 12 ++ tests/data/output/tes_to_wrroc_output.json | 24 --- ...s_to_wrroc_output_with_missing_fields.json | 10 -- tests/data/output/wes_to_wrroc_outpu.json | 13 -- .../output/wrroc_from_tes_full_output.json | 20 +++ tests/unit/test_tes_converter.py | 153 ++++++++++-------- tests/unit/test_wes_converter.py | 134 +++++++++------ 15 files changed, 387 insertions(+), 232 deletions(-) delete mode 100644 tests/data/input/tes_example.json delete mode 100644 tests/data/input/tes_example_with_missing_fields.json delete mode 100644 tests/data/input/wes_example.json create mode 100644 tests/data/input/wrroc_from_tes_full.json create mode 100644 tests/data/input/wrroc_from_tes_minimal.json create mode 100644 tests/data/input/wrroc_from_wes_full.json create mode 100644 tests/data/input/wrroc_from_wes_minimal.json delete mode 100644 tests/data/output/tes_to_wrroc_output.json delete mode 100644 tests/data/output/tes_to_wrroc_output_with_missing_fields.json delete mode 100644 tests/data/output/wes_to_wrroc_outpu.json create mode 100644 tests/data/output/wrroc_from_tes_full_output.json diff --git a/crategen/converters/tes_converter.py b/crategen/converters/tes_converter.py index 99e176d..fb861ea 100644 --- a/crategen/converters/tes_converter.py +++ b/crategen/converters/tes_converter.py @@ -4,6 +4,11 @@ class TESConverter(AbstractConverter): def convert_to_wrroc(self, tes_data): + if not isinstance(tes_data.get("id"), str): + raise ValueError("Invalid id type") + if not isinstance(tes_data.get("name"), str): + raise ValueError("Invalid name type") + # Validate and extract data with defaults id = tes_data.get("id", "") name = tes_data.get("name", "") @@ -12,7 +17,7 @@ def convert_to_wrroc(self, tes_data): inputs = tes_data.get("inputs", []) outputs = tes_data.get("outputs", []) creation_time = tes_data.get("creation_time", "") - end_time = tes_data.get("logs", [{}])[0].get("end_time", "") # Corrected to fetch from logs + end_time = tes_data.get("logs", [{}])[0].get("end_time", "") # Convert to WRROC wrroc_data = { @@ -28,6 +33,11 @@ def convert_to_wrroc(self, tes_data): return wrroc_data def convert_from_wrroc(self, wrroc_data): + if not isinstance(wrroc_data.get("@id"), str): + raise ValueError("Invalid @id type") + if not isinstance(wrroc_data.get("name"), str): + raise ValueError("Invalid name type") + # Validate and extract data with defaults id = wrroc_data.get("@id", "") name = wrroc_data.get("name", "") @@ -37,7 +47,7 @@ def convert_from_wrroc(self, wrroc_data): result_data = wrroc_data.get("result", []) start_time = wrroc_data.get("startTime", "") end_time = wrroc_data.get("endTime", "") - + # Convert from WRROC to TES tes_data = { "id": id, @@ -47,6 +57,6 @@ def convert_from_wrroc(self, wrroc_data): "inputs": [{"url": obj.get("@id", ""), "path": obj.get("name", "")} for obj in object_data], "outputs": [{"url": res.get("@id", ""), "path": res.get("name", "")} for res in result_data], "creation_time": start_time, - "logs": [{"end_time": end_time}], # Added to logs + "logs": [{"end_time": end_time}], } return tes_data diff --git a/crategen/converters/wes_converter.py b/crategen/converters/wes_converter.py index 154b17d..a4e550e 100644 --- a/crategen/converters/wes_converter.py +++ b/crategen/converters/wes_converter.py @@ -4,43 +4,54 @@ class WESConverter(AbstractConverter): def convert_to_wrroc(self, wes_data): - # Validate and extract data with defaults - run_id = wes_data.get("run_id", "") - name = wes_data.get("run_log", {}).get("name", "") - state = wes_data.get("state", "") - start_time = wes_data.get("run_log", {}).get("start_time", "") - end_time = wes_data.get("run_log", {}).get("end_time", "") - outputs = wes_data.get("outputs", {}) - - # Convert to WRROC + if "run_id" in wes_data and not isinstance(wes_data["run_id"], str): + raise ValueError("Invalid run_id type") + if "run_log" in wes_data and not isinstance(wes_data["run_log"], dict): + raise ValueError("Invalid run_log type") + if "run_log" in wes_data and "nested" in wes_data["run_log"]: + raise ValueError("Invalid nested structure in run_log") + wrroc_data = { - "@id": run_id, - "name": name, - "status": state, - "startTime": convert_to_iso8601(start_time), - "endTime": convert_to_iso8601(end_time), - "result": [{"@id": output.get("location", ""), "name": output.get("name", "")} for output in outputs], + "@id": wes_data.get("run_id", ""), + "status": wes_data.get("state", ""), + "result": [{"@id": output.get("location", ""), "name": output.get("name", "")} for output in wes_data.get("outputs", [])], } + + start_time = convert_to_iso8601(wes_data.get("run_log", {}).get("start_time")) + end_time = convert_to_iso8601(wes_data.get("run_log", {}).get("end_time")) + + if start_time: + wrroc_data["startTime"] = start_time + if end_time: + wrroc_data["endTime"] = end_time + + if "run_log" in wes_data and "name" in wes_data["run_log"] and wes_data["run_log"]["name"]: + wrroc_data["name"] = wes_data["run_log"]["name"] + return wrroc_data def convert_from_wrroc(self, wrroc_data): - # Validate and extract data with defaults - run_id = wrroc_data.get("@id", "") - name = wrroc_data.get("name", "") - start_time = wrroc_data.get("startTime", "") - end_time = wrroc_data.get("endTime", "") - state = wrroc_data.get("status", "") - result_data = wrroc_data.get("result", []) - - # Convert from WRROC to WES + if "@id" in wrroc_data and not isinstance(wrroc_data["@id"], str): + raise ValueError("Invalid @id type") + if "name" in wrroc_data and not isinstance(wrroc_data["name"], str): + raise ValueError("Invalid name type") + if "nested" in wrroc_data: + raise ValueError("Invalid nested structure") + wes_data = { - "run_id": run_id, + "run_id": wrroc_data.get("@id", ""), + "state": wrroc_data.get("status", ""), + "outputs": [{"location": res.get("@id", ""), "name": res.get("name", "")} for res in wrroc_data.get("result", [])], "run_log": { - "name": name, - "start_time": start_time, - "end_time": end_time, + "start_time": wrroc_data.get("startTime", ""), + "end_time": wrroc_data.get("endTime", ""), }, - "state": state, - "outputs": [{"location": res.get("@id", ""), "name": res.get("name", "")} for res in result_data], } + + if "name" in wrroc_data and wrroc_data["name"]: + wes_data["run_log"]["name"] = wrroc_data["name"] + + if not wes_data["run_log"]["start_time"] and not wes_data["run_log"]["end_time"] and "name" not in wes_data["run_log"]: + wes_data.pop("run_log") + return wes_data diff --git a/tests/data/input/tes_example.json b/tests/data/input/tes_example.json deleted file mode 100644 index 9a8b38f..0000000 --- a/tests/data/input/tes_example.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "id": "task-id-1", - "name": "example-task-1", - "description": "Example task description 1", - "executors": [{"image": "executor-image-1"}], - "inputs": [ - {"url": "input-url-1", "path": "input-path-1"}, - {"url": "input-url-2", "path": "input-path-2"} - ], - "outputs": [ - {"url": "output-url-1", "path": "output-path-1"} - ], - "creation_time": "2023-07-10T14:30:00Z", - "logs": [{"end_time": "2023-07-10T15:30:00Z"}] -} - diff --git a/tests/data/input/tes_example_with_missing_fields.json b/tests/data/input/tes_example_with_missing_fields.json deleted file mode 100644 index ad1238a..0000000 --- a/tests/data/input/tes_example_with_missing_fields.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "id": "task-id-3", - "name": "example-task-3", - "description": "Example task description 3", - "executors": [{"image": "executor-image-3"}], - "inputs": [], - "outputs": [], - "creation_time": "2023-07-12T12:00:00Z", - "logs": [{"end_time": "2023-07-12T12:30:00Z"}] -} diff --git a/tests/data/input/wes_example.json b/tests/data/input/wes_example.json deleted file mode 100644 index da58cdc..0000000 --- a/tests/data/input/wes_example.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "run_id": "run-id-1", - "run_log": { - "name": "example-run-1", - "start_time": "2023-07-10T14:30:00Z", - "end_time": "2023-07-10T15:30:00Z" - }, - "state": "COMPLETED", - "outputs": [ - {"location": "output-location-1", "name": "output-name-1"} - ] -} diff --git a/tests/data/input/wrroc_from_tes_full.json b/tests/data/input/wrroc_from_tes_full.json new file mode 100644 index 0000000..90c7f85 --- /dev/null +++ b/tests/data/input/wrroc_from_tes_full.json @@ -0,0 +1,80 @@ +{ + "id": "job-0012345", + "state": "COMPLETE", + "name": "test-task", + "description": "A test task with full parameters", + "inputs": [ + { + "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "path": "/data/file1" + } + ], + "outputs": [ + { + "path": "/data/outfile", + "url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", + "type": "FILE" + } + ], + "resources": { + "cpu_cores": 4, + "preemptible": false, + "ram_gb": 8, + "disk_gb": 40, + "zones": "us-west-1" + }, + "executors": [ + { + "image": "ubuntu:20.04", + "command": ["/bin/md5sum", "/data/file1"], + "workdir": "/data/", + "stdin": "/data/file1", + "stdout": "/tmp/stdout.log", + "stderr": "/tmp/stderr.log", + "env": { + "BLASTDB": "/data/GRC38", + "HMMERDB": "/data/hmmer" + } + } + ], + "volumes": [ + "/vol/A/" + ], + "tags": { + "WORKFLOW_ID": "cwl-01234", + "PROJECT_GROUP": "alice-lab" + }, + "logs": [ + { + "logs": [ + { + "start_time": "2024-7-02T15:00:00.000Z", + "end_time": "2024-7-02T16:00:00.000Z", + "stdout": "string", + "stderr": "string", + "exit_code": 0 + } + ], + "metadata": { + "host": "worker-001", + "slurmm_id": 123456 + }, + "start_time": "2024-7-02T15:00:00.000Z", + "end_time": "2024-7-02T16:00:00.000Z", + "outputs": [ + { + "url": "string", + "path": "string", + "size_bytes": [ + "1024" + ] + } + ], + "system_logs": [ + "string" + ] + } + ], + "creation_time": "2024-7-02T15:00:00.000Z" + } + \ No newline at end of file diff --git a/tests/data/input/wrroc_from_tes_minimal.json b/tests/data/input/wrroc_from_tes_minimal.json new file mode 100644 index 0000000..cc4514a --- /dev/null +++ b/tests/data/input/wrroc_from_tes_minimal.json @@ -0,0 +1,30 @@ +{ + "id": "task-id", + "name": "test-task", + "description": "test-description", + "executors": [ + { + "image": "alpine:latest", + "command": ["sh", "-c", "echo 'Hello, World!' > /output/hello.txt"] + } + ], + "inputs": [ + { + "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "path": "/input/README.md" + } + ], + "outputs": [ + { + "url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", + "path": "/output/LICENSE" + } + ], + "creation_time": "2024-07-10T14:30:00Z", + "logs": [ + { + "end_time": "2024-07-10T15:30:00Z" + } + ] + } + \ No newline at end of file diff --git a/tests/data/input/wrroc_from_wes_full.json b/tests/data/input/wrroc_from_wes_full.json new file mode 100644 index 0000000..1fb9a56 --- /dev/null +++ b/tests/data/input/wrroc_from_wes_full.json @@ -0,0 +1,18 @@ +{ + "run_id": "full-run-id", + "run_log": { + "name": "full-run", + "start_time": "2024-07-27T14:30:00Z", + "end_time": "2024-07-27T15:30:00Z" + }, + "state": "COMPLETED", + "outputs": [{ + "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "name": "README.md" + }], + "workflow_log": { + "workflow_id": "workflow-id", + "workflow_type": "CWL", + "workflow_type_version": "v1.0" + } +} diff --git a/tests/data/input/wrroc_from_wes_minimal.json b/tests/data/input/wrroc_from_wes_minimal.json new file mode 100644 index 0000000..0b735be --- /dev/null +++ b/tests/data/input/wrroc_from_wes_minimal.json @@ -0,0 +1,12 @@ +{ + "run_id": "minimal-run-id", + "run_log": { + "name": "minimal-run", + "start_time": "2024-07-27T14:30:00Z" + }, + "state": "COMPLETED", + "outputs": [{ + "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "name": "README.md" + }] +} diff --git a/tests/data/output/tes_to_wrroc_output.json b/tests/data/output/tes_to_wrroc_output.json deleted file mode 100644 index 85041d7..0000000 --- a/tests/data/output/tes_to_wrroc_output.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "@id": "task-id-1", - "name": "example-task-1", - "description": "Example task description 1", - "instrument": "executor-image-1", - "object": [ - { - "@id": "input-url-1", - "name": "input-path-1" - }, - { - "@id": "input-url-2", - "name": "input-path-2" - } - ], - "result": [ - { - "@id": "output-url-1", - "name": "output-path-1" - } - ], - "startTime": "2023-07-10T14:30:00Z", - "endTime": "2023-07-10T15:30:00Z" -} \ No newline at end of file diff --git a/tests/data/output/tes_to_wrroc_output_with_missing_fields.json b/tests/data/output/tes_to_wrroc_output_with_missing_fields.json deleted file mode 100644 index 5daf62d..0000000 --- a/tests/data/output/tes_to_wrroc_output_with_missing_fields.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "@id": "task-id-3", - "name": "example-task-3", - "description": "Example task description 3", - "instrument": "executor-image-3", - "object": [], - "result": [], - "startTime": "2023-07-12T12:00:00Z", - "endTime": "2023-07-12T12:30:00Z" -} \ No newline at end of file diff --git a/tests/data/output/wes_to_wrroc_outpu.json b/tests/data/output/wes_to_wrroc_outpu.json deleted file mode 100644 index 6e88afc..0000000 --- a/tests/data/output/wes_to_wrroc_outpu.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "@id": "run-id-1", - "name": "example-run-1", - "status": "COMPLETED", - "startTime": "2023-07-10T14:30:00Z", - "endTime": "2023-07-10T15:30:00Z", - "result": [ - { - "@id": "output-location-1", - "name": "output-name-1" - } - ] -} \ No newline at end of file diff --git a/tests/data/output/wrroc_from_tes_full_output.json b/tests/data/output/wrroc_from_tes_full_output.json new file mode 100644 index 0000000..b42d235 --- /dev/null +++ b/tests/data/output/wrroc_from_tes_full_output.json @@ -0,0 +1,20 @@ +{ + "@id": "full-task-id", + "name": "full-task", + "description": "This is a full example task", + "instrument": "alpine:3.12", + "object": [ + { + "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "name": "/input/README.md" + } + ], + "result": [ + { + "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "/output/LICENSE" + } + ], + "startTime": "2023-07-10T14:30:00Z", + "endTime": "2023-07-10T15:30:00Z" +} \ No newline at end of file diff --git a/tests/unit/test_tes_converter.py b/tests/unit/test_tes_converter.py index 874b032..f4d0209 100644 --- a/tests/unit/test_tes_converter.py +++ b/tests/unit/test_tes_converter.py @@ -8,35 +8,24 @@ def setUp(self): def test_convert_to_wrroc(self): tes_data = { - "id": "task-id-1", - "name": "example-task-1", - "description": "Example task description 1", - "executors": [{"image": "executor-image-1"}], - "inputs": [ - {"url": "input-url-1", "path": "input-path-1"}, - {"url": "input-url-2", "path": "input-path-2"} - ], - "outputs": [ - {"url": "output-url-1", "path": "output-path-1"} - ], + "id": "task-id", + "name": "test-task", + "description": "test-description", + "executors": [{"image": "alpine:latest"}], + "inputs": [{"url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", "path": "/input/README.md"}], + "outputs": [{"url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "path": "/output/LICENSE"}], "creation_time": "2023-07-10T14:30:00Z", - "logs": [{"end_time": "2023-07-10T15:30:00Z"}] } expected_wrroc_data = { - "@id": "task-id-1", - "name": "example-task-1", - "description": "Example task description 1", - "instrument": "executor-image-1", - "object": [ - {"@id": "input-url-1", "name": "input-path-1"}, - {"@id": "input-url-2", "name": "input-path-2"} - ], - "result": [ - {"@id": "output-url-1", "name": "output-path-1"} - ], + "@id": "task-id", + "name": "test-task", + "description": "test-description", + "instrument": "alpine:latest", + "object": [{"@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", "name": "/input/README.md"}], + "result": [{"@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "/output/LICENSE"}], "startTime": "2023-07-10T14:30:00Z", - "endTime": "2023-07-10T15:30:00Z" + "endTime": None } result = self.converter.convert_to_wrroc(tes_data) @@ -44,79 +33,107 @@ def test_convert_to_wrroc(self): def test_convert_from_wrroc(self): wrroc_data = { - "@id": "task-id-1", - "name": "example-task-1", - "description": "Example task description 1", - "instrument": "executor-image-1", + "@id": "task-id", + "name": "test-task", + "description": "test-description", + "instrument": "alpine:latest", "object": [ - {"@id": "input-url-1", "name": "input-path-1"}, - {"@id": "input-url-2", "name": "input-path-2"} + { + "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "name": "/input/README.md" + } ], "result": [ - {"@id": "output-url-1", "name": "output-path-1"} + { + "@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", + "name": "/output/LICENSE" + } ], "startTime": "2023-07-10T14:30:00Z", "endTime": "2023-07-10T15:30:00Z" } expected_tes_data = { - "id": "task-id-1", - "name": "example-task-1", - "description": "Example task description 1", - "executors": [{"image": "executor-image-1"}], + "id": "task-id", + "name": "test-task", + "description": "test-description", + "executors": [ + { + "image": "alpine:latest", + } + ], "inputs": [ - {"url": "input-url-1", "path": "input-path-1"}, - {"url": "input-url-2", "path": "input-path-2"} + { + "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "path": "/input/README.md" + } ], "outputs": [ - {"url": "output-url-1", "path": "output-path-1"} + { + "url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", + "path": "/output/LICENSE" + } ], "creation_time": "2023-07-10T14:30:00Z", - "logs": [{"end_time": "2023-07-10T15:30:00Z"}] + "logs": [ + { + "end_time": "2023-07-10T15:30:00Z" + } + ] } result = self.converter.convert_from_wrroc(wrroc_data) self.assertEqual(result, expected_tes_data) - def test_convert_to_wrroc_missing_fields(self): - tes_data = { - "id": "task-id-2", - "name": "example-task-2" + def test_convert_to_wrroc_invalid_data(self): + invalid_tes_data = { + "id": 123, # id should be a string + "name": None, # name should be a string } + with self.assertRaises(ValueError) as context: + self.converter.convert_to_wrroc(invalid_tes_data) + self.assertIn("Invalid id type", str(context.exception)) - expected_wrroc_data = { - "@id": "task-id-2", - "name": "example-task-2", - "description": "", - "instrument": None, - "object": [], - "result": [], - "startTime": None, - "endTime": None + def test_convert_from_wrroc_invalid_data(self): + invalid_wrroc_data = { + "@id": 123, # @id should be a string + "name": None, # name should be a string } + with self.assertRaises(ValueError) as context: + self.converter.convert_from_wrroc(invalid_wrroc_data) + self.assertIn("Invalid @id type", str(context.exception)) + def test_convert_to_wrroc_missing_fields(self): + tes_data = { + "id": "task-id-2", + "name": "example-task" + } result = self.converter.convert_to_wrroc(tes_data) - self.assertEqual(result, expected_wrroc_data) + self.assertIsNotNone(result) + self.assertIn("@id", result) + self.assertIn("name", result) + self.assertIn("description", result) + self.assertIn("instrument", result) + self.assertIn("object", result) + self.assertIn("result", result) + self.assertIn("startTime", result) + self.assertIn("endTime", result) def test_convert_from_wrroc_missing_fields(self): wrroc_data = { "@id": "task-id-2", - "name": "example-task-2" + "name": "example-task" } - - expected_tes_data = { - "id": "task-id-2", - "name": "example-task-2", - "description": "", - "executors": [{"image": ""}], - "inputs": [], - "outputs": [], - "creation_time": "", - "logs": [{"end_time": ""}] - } - result = self.converter.convert_from_wrroc(wrroc_data) - self.assertEqual(result, expected_tes_data) + self.assertIsNotNone(result) + self.assertIn("id", result) + self.assertIn("name", result) + self.assertIn("description", result) + self.assertIn("executors", result) + self.assertIn("inputs", result) + self.assertIn("outputs", result) + self.assertIn("creation_time", result) + self.assertIn("logs", result) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_wes_converter.py b/tests/unit/test_wes_converter.py index b1e88af..918c298 100644 --- a/tests/unit/test_wes_converter.py +++ b/tests/unit/test_wes_converter.py @@ -8,23 +8,23 @@ def setUp(self): def test_convert_to_wrroc(self): wes_data = { - "run_id": "run-id-1", + "run_id": "run-id", "run_log": { - "name": "example-run-1", - "start_time": "2023-07-10T14:30:00Z", - "end_time": "2023-07-10T15:30:00Z" + "name": "test-run", + "start_time": "2024-07-10T14:30:00Z", + "end_time": "2024-07-10T15:30:00Z" }, "state": "COMPLETED", - "outputs": [{"location": "output-location-1", "name": "output-name-1"}] + "outputs": [{"location": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "LICENSE"}] } expected_wrroc_data = { - "@id": "run-id-1", - "name": "example-run-1", + "@id": "run-id", + "name": "test-run", "status": "COMPLETED", - "startTime": "2023-07-10T14:30:00Z", - "endTime": "2023-07-10T15:30:00Z", - "result": [{"@id": "output-location-1", "name": "output-name-1"}] + "startTime": "2024-07-10T14:30:00Z", + "endTime": "2024-07-10T15:30:00Z", + "result": [{"@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "LICENSE"}] } result = self.converter.convert_to_wrroc(wes_data) @@ -32,67 +32,109 @@ def test_convert_to_wrroc(self): def test_convert_from_wrroc(self): wrroc_data = { - "@id": "run-id-1", - "name": "example-run-1", + "@id": "run-id", + "name": "test-run", + "startTime": "2024-07-10T14:30:00Z", + "endTime": "2024-07-10T15:30:00Z", "status": "COMPLETED", - "startTime": "2023-07-10T14:30:00Z", - "endTime": "2023-07-10T15:30:00Z", - "result": [{"@id": "output-location-1", "name": "output-name-1"}] + "result": [{"@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "LICENSE"}] } expected_wes_data = { - "run_id": "run-id-1", + "run_id": "run-id", "run_log": { - "name": "example-run-1", - "start_time": "2023-07-10T14:30:00Z", - "end_time": "2023-07-10T15:30:00Z" + "name": "test-run", + "start_time": "2024-07-10T14:30:00Z", + "end_time": "2024-07-10T15:30:00Z" }, "state": "COMPLETED", - "outputs": [{"location": "output-location-1", "name": "output-name-1"}] + "outputs": [{"location": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "LICENSE"}] } result = self.converter.convert_from_wrroc(wrroc_data) self.assertEqual(result, expected_wes_data) - def test_convert_to_wrroc_missing_fields(self): - wes_data = { - "run_id": "run-id-2", - "run_log": { - "name": "example-run-2" - } + def test_convert_to_wrroc_invalid_data(self): + invalid_wes_data = { + "run_id": 123, # run_id should be a string + "run_log": None, # run_log should be a dictionary } + with self.assertRaises(ValueError) as context: + self.converter.convert_to_wrroc(invalid_wes_data) + self.assertIn("Invalid run_id type", str(context.exception)) - expected_wrroc_data = { - "@id": "run-id-2", - "name": "example-run-2", - "status": "", - "startTime": None, - "endTime": None, - "result": [] + def test_convert_from_wrroc_invalid_data(self): + invalid_wrroc_data = { + "@id": 123, # @id should be a string + "name": None, # name should be a string } + with self.assertRaises(ValueError) as context: + self.converter.convert_from_wrroc(invalid_wrroc_data) + self.assertIn("Invalid @id type", str(context.exception)) + def test_convert_to_wrroc_missing_fields(self): + wes_data = { + "run_id": "run-id-2", + "state": "COMPLETED" + } result = self.converter.convert_to_wrroc(wes_data) - self.assertEqual(result, expected_wrroc_data) + self.assertIsNotNone(result) + self.assertIn("@id", result) + self.assertIn("status", result) + self.assertNotIn("name", result) + self.assertNotIn("startTime", result) + self.assertNotIn("endTime", result) def test_convert_from_wrroc_missing_fields(self): wrroc_data = { "@id": "run-id-2", - "name": "example-run-2" + "status": "COMPLETED" } + result = self.converter.convert_from_wrroc(wrroc_data) + self.assertIsNotNone(result) + self.assertIn("run_id", result) + self.assertIn("state", result) + self.assertNotIn("name", result) + self.assertNotIn("run_log", result) - expected_wes_data = { - "run_id": "run-id-2", + def test_convert_from_wrroc_invalid_nested_structure(self): + wrroc_data = { + "@id": "run-id-4", + "name": "nested-run", + "startTime": "2024-07-10T14:30:00Z", + "endTime": "2024-07-10T15:30:00Z", + "status": "COMPLETED", + "result": [ + { + "@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", + "name": "LICENSE" + } + ], + "nested": { + "unexpected_field": "unexpected_value" + } + } + with self.assertRaises(ValueError) as context: + self.converter.convert_from_wrroc(wrroc_data) + self.assertIn("Invalid nested structure", str(context.exception)) + + def test_convert_to_wrroc_with_nested_structures(self): + wes_data = { + "run_id": "run-id-1", "run_log": { - "name": "example-run-2", - "start_time": "", - "end_time": "" + "name": "test-run", + "start_time": "2024-07-10T14:30:00Z", + "end_time": "2024-07-10T15:30:00Z", + "nested": { + "unexpected_field": "unexpected_value" + } }, - "state": "", - "outputs": [] + "state": "COMPLETED", + "outputs": [{"location": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "LICENSE"}] } + with self.assertRaises(ValueError) as context: + self.converter.convert_to_wrroc(wes_data) + self.assertIn("Invalid nested structure", str(context.exception)) - result = self.converter.convert_from_wrroc(wrroc_data) - self.assertEqual(result, expected_wes_data) - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() From ddaa0de689311804d6fabad9c8cfd1bbe9a8255b Mon Sep 17 00:00:00 2001 From: karanjot786 Date: Thu, 1 Aug 2024 11:59:28 +0530 Subject: [PATCH 3/6] test(tes,wes): add comprehensive unit tests --- crategen/converters/tes_converter.py | 72 ++++------ crategen/converters/wes_converter.py | 64 ++++----- crategen/models.py | 96 +++++++++++++ poetry.lock | 130 +++++++++--------- tests/data/input/tes_full.json | 66 +++++++++ tests/data/input/tes_minimal.json | 57 ++++++++ tests/data/input/wes_full.json | 50 +++++++ tests/data/input/wes_minimal.json | 15 ++ tests/data/input/wrroc_from_tes_full.json | 80 ----------- tests/data/input/wrroc_from_tes_minimal.json | 30 ---- tests/data/input/wrroc_from_wes_full.json | 18 --- tests/data/input/wrroc_from_wes_minimal.json | 12 -- tests/data/output/procrc_from_tes_full.json | 20 +++ .../data/output/procrc_from_tes_minimal.json | 20 +++ tests/data/output/wfrc_from_wes_full.json | 13 ++ tests/data/output/wfrc_from_wes_minimal.json | 13 ++ .../output/wrroc_from_tes_full_output.json | 20 --- tests/unit/test_tes_converter.py | 99 ++++--------- tests/unit/test_wes_converter.py | 115 +++++----------- 19 files changed, 532 insertions(+), 458 deletions(-) create mode 100644 crategen/models.py create mode 100644 tests/data/input/tes_full.json create mode 100644 tests/data/input/tes_minimal.json create mode 100644 tests/data/input/wes_full.json create mode 100644 tests/data/input/wes_minimal.json delete mode 100644 tests/data/input/wrroc_from_tes_full.json delete mode 100644 tests/data/input/wrroc_from_tes_minimal.json delete mode 100644 tests/data/input/wrroc_from_wes_full.json delete mode 100644 tests/data/input/wrroc_from_wes_minimal.json create mode 100644 tests/data/output/procrc_from_tes_full.json create mode 100644 tests/data/output/procrc_from_tes_minimal.json create mode 100644 tests/data/output/wfrc_from_wes_full.json create mode 100644 tests/data/output/wfrc_from_wes_minimal.json delete mode 100644 tests/data/output/wrroc_from_tes_full_output.json diff --git a/crategen/converters/tes_converter.py b/crategen/converters/tes_converter.py index fb861ea..309dc7c 100644 --- a/crategen/converters/tes_converter.py +++ b/crategen/converters/tes_converter.py @@ -1,62 +1,42 @@ +from pydantic import ValidationError from .abstract_converter import AbstractConverter from .utils import convert_to_iso8601 +from ..models import TESData, WRROCData class TESConverter(AbstractConverter): def convert_to_wrroc(self, tes_data): - if not isinstance(tes_data.get("id"), str): - raise ValueError("Invalid id type") - if not isinstance(tes_data.get("name"), str): - raise ValueError("Invalid name type") - - # Validate and extract data with defaults - id = tes_data.get("id", "") - name = tes_data.get("name", "") - description = tes_data.get("description", "") - executors = tes_data.get("executors", [{}]) - inputs = tes_data.get("inputs", []) - outputs = tes_data.get("outputs", []) - creation_time = tes_data.get("creation_time", "") - end_time = tes_data.get("logs", [{}])[0].get("end_time", "") + try: + validated_tes_data = TESData(**tes_data) + except ValidationError as e: + raise ValueError(f"Invalid TES data: {e}") - # Convert to WRROC wrroc_data = { - "@id": id, - "name": name, - "description": description, - "instrument": executors[0].get("image", None) if executors else None, - "object": [{"@id": input.get("url", ""), "name": input.get("path", "")} for input in inputs], - "result": [{"@id": output.get("url", ""), "name": output.get("path", "")} for output in outputs], - "startTime": convert_to_iso8601(creation_time), - "endTime": convert_to_iso8601(end_time), + "@id": validated_tes_data.id, + "name": validated_tes_data.name, + "description": validated_tes_data.description, + "instrument": validated_tes_data.executors[0].image if validated_tes_data.executors else None, + "object": [{"@id": input.url, "name": input.path} for input in validated_tes_data.inputs], + "result": [{"@id": output.url, "name": output.path} for output in validated_tes_data.outputs], + "startTime": convert_to_iso8601(validated_tes_data.creation_time), + "endTime": convert_to_iso8601(validated_tes_data.logs[0].end_time) if validated_tes_data.logs else None, } return wrroc_data def convert_from_wrroc(self, wrroc_data): - if not isinstance(wrroc_data.get("@id"), str): - raise ValueError("Invalid @id type") - if not isinstance(wrroc_data.get("name"), str): - raise ValueError("Invalid name type") + try: + validated_wrroc_data = WRROCData(**wrroc_data) + except ValidationError as e: + raise ValueError(f"Invalid WRROC data: {e}") - # Validate and extract data with defaults - id = wrroc_data.get("@id", "") - name = wrroc_data.get("name", "") - description = wrroc_data.get("description", "") - instrument = wrroc_data.get("instrument", "") - object_data = wrroc_data.get("object", []) - result_data = wrroc_data.get("result", []) - start_time = wrroc_data.get("startTime", "") - end_time = wrroc_data.get("endTime", "") - - # Convert from WRROC to TES tes_data = { - "id": id, - "name": name, - "description": description, - "executors": [{"image": instrument}], - "inputs": [{"url": obj.get("@id", ""), "path": obj.get("name", "")} for obj in object_data], - "outputs": [{"url": res.get("@id", ""), "path": res.get("name", "")} for res in result_data], - "creation_time": start_time, - "logs": [{"end_time": end_time}], + "id": validated_wrroc_data.id, + "name": validated_wrroc_data.name, + "description": validated_wrroc_data.description, + "executors": [{"image": validated_wrroc_data.instrument}], + "inputs": [{"url": obj.id, "path": obj.name} for obj in validated_wrroc_data.object], + "outputs": [{"url": res.id, "path": res.name} for res in validated_wrroc_data.result], + "creation_time": validated_wrroc_data.startTime, + "logs": [{"end_time": validated_wrroc_data.endTime}], } return tes_data diff --git a/crategen/converters/wes_converter.py b/crategen/converters/wes_converter.py index a4e550e..9968258 100644 --- a/crategen/converters/wes_converter.py +++ b/crategen/converters/wes_converter.py @@ -1,57 +1,43 @@ +from pydantic import ValidationError from .abstract_converter import AbstractConverter +from ..models import WESData, WRROCDataWES from .utils import convert_to_iso8601 class WESConverter(AbstractConverter): def convert_to_wrroc(self, wes_data): - if "run_id" in wes_data and not isinstance(wes_data["run_id"], str): - raise ValueError("Invalid run_id type") - if "run_log" in wes_data and not isinstance(wes_data["run_log"], dict): - raise ValueError("Invalid run_log type") - if "run_log" in wes_data and "nested" in wes_data["run_log"]: - raise ValueError("Invalid nested structure in run_log") + try: + wes_model = WESData(**wes_data) + except ValidationError as e: + raise ValueError(f"Invalid WES data: {e}") + outputs = wes_model.outputs wrroc_data = { - "@id": wes_data.get("run_id", ""), - "status": wes_data.get("state", ""), - "result": [{"@id": output.get("location", ""), "name": output.get("name", "")} for output in wes_data.get("outputs", [])], + "@id": wes_model.run_id, + "name": wes_model.run_log.name, + "status": wes_model.state, + "startTime": convert_to_iso8601(wes_model.run_log.start_time), + "endTime": convert_to_iso8601(wes_model.run_log.end_time), + "result": [{"@id": output.location, "name": output.name} for output in outputs], } - - start_time = convert_to_iso8601(wes_data.get("run_log", {}).get("start_time")) - end_time = convert_to_iso8601(wes_data.get("run_log", {}).get("end_time")) - - if start_time: - wrroc_data["startTime"] = start_time - if end_time: - wrroc_data["endTime"] = end_time - - if "run_log" in wes_data and "name" in wes_data["run_log"] and wes_data["run_log"]["name"]: - wrroc_data["name"] = wes_data["run_log"]["name"] - return wrroc_data def convert_from_wrroc(self, wrroc_data): - if "@id" in wrroc_data and not isinstance(wrroc_data["@id"], str): - raise ValueError("Invalid @id type") - if "name" in wrroc_data and not isinstance(wrroc_data["name"], str): - raise ValueError("Invalid name type") - if "nested" in wrroc_data: - raise ValueError("Invalid nested structure") + try: + wrroc_model = WRROCDataWES(**wrroc_data) + except ValidationError as e: + raise ValueError(f"Invalid WRROC data: {e}") + + result_data = wrroc_model.result wes_data = { - "run_id": wrroc_data.get("@id", ""), - "state": wrroc_data.get("status", ""), - "outputs": [{"location": res.get("@id", ""), "name": res.get("name", "")} for res in wrroc_data.get("result", [])], + "run_id": wrroc_model.id, "run_log": { - "start_time": wrroc_data.get("startTime", ""), - "end_time": wrroc_data.get("endTime", ""), + "name": wrroc_model.name, + "start_time": wrroc_model.startTime, + "end_time": wrroc_model.endTime, }, + "state": wrroc_model.status, + "outputs": [{"location": res.id, "name": res.name} for res in result_data], } - - if "name" in wrroc_data and wrroc_data["name"]: - wes_data["run_log"]["name"] = wrroc_data["name"] - - if not wes_data["run_log"]["start_time"] and not wes_data["run_log"]["end_time"] and "name" not in wes_data["run_log"]: - wes_data.pop("run_log") - return wes_data diff --git a/crategen/models.py b/crategen/models.py new file mode 100644 index 0000000..ca493ff --- /dev/null +++ b/crategen/models.py @@ -0,0 +1,96 @@ +from pydantic import BaseModel, Field, validator,root_validator +from typing import List, Optional, Dict, Any + +class Executor(BaseModel): + image: str + command: List[str] + +class TESInputs(BaseModel): + url: str + path: str + +class TESOutputs(BaseModel): + url: str + path: str + +class TESLogs(BaseModel): + end_time: Optional[str] = None + +class TESData(BaseModel): + id: str + name: str + description: Optional[str] = "" + executors: List[Executor] + inputs: List[TESInputs] + outputs: List[TESOutputs] + creation_time: str + logs: List[TESLogs] + +class WESRunLog(BaseModel): + name: Optional[str] = None + start_time: Optional[str] = None + end_time: Optional[str] = None + +class WESOutputs(BaseModel): + location: str + name: str + +class WESData(BaseModel): + run_id: str + run_log: WESRunLog + state: str + outputs: List[WESOutputs] + + @root_validator(pre=True) + def check_unexpected_fields(cls, values): + allowed_fields = {"run_id", "run_log", "state", "outputs"} + unexpected = set(values.keys()) - allowed_fields + if unexpected: + raise ValueError(f"Unexpected fields: {unexpected}") + return values + +class WRROCInputs(BaseModel): + id: str + name: str + +class WRROCOutputs(BaseModel): + id: str + name: str + +class WRROCData(BaseModel): + id: str + name: str + description: Optional[str] = "" + instrument: Optional[str] = None + object: List[WRROCInputs] + result: List[WRROCOutputs] + startTime: Optional[str] = None + endTime: Optional[str] = None + + @validator('id') + def id_must_be_string(cls, value): + if not isinstance(value, str): + raise ValueError('Invalid id type') + return value + + @validator('name') + def name_must_be_string(cls, value): + if not isinstance(value, str): + raise ValueError('Invalid name type') + return value + +class WRROCDataWES(BaseModel): + id: str + name: str + status: str + result: List[WRROCOutputs] + startTime: Optional[str] = None + endTime: Optional[str] = None + + @root_validator(pre=True) + def check_unexpected_fields(cls, values): + allowed_fields = {"id", "name", "startTime", "endTime", "status", "result"} + unexpected = set(values.keys()) - allowed_fields + if unexpected: + raise ValueError(f"Unexpected fields: {unexpected}") + return values diff --git a/poetry.lock b/poetry.lock index 3d2faaa..70ff003 100644 --- a/poetry.lock +++ b/poetry.lock @@ -671,38 +671,38 @@ files = [ [[package]] name = "mypy" -version = "1.11.0" +version = "1.11.1" description = "Optional static typing for Python" optional = false python-versions = ">=3.8" files = [ - {file = "mypy-1.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3824187c99b893f90c845bab405a585d1ced4ff55421fdf5c84cb7710995229"}, - {file = "mypy-1.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:96f8dbc2c85046c81bcddc246232d500ad729cb720da4e20fce3b542cab91287"}, - {file = "mypy-1.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a5d8d8dd8613a3e2be3eae829ee891b6b2de6302f24766ff06cb2875f5be9c6"}, - {file = "mypy-1.11.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:72596a79bbfb195fd41405cffa18210af3811beb91ff946dbcb7368240eed6be"}, - {file = "mypy-1.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:35ce88b8ed3a759634cb4eb646d002c4cef0a38f20565ee82b5023558eb90c00"}, - {file = "mypy-1.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:98790025861cb2c3db8c2f5ad10fc8c336ed2a55f4daf1b8b3f877826b6ff2eb"}, - {file = "mypy-1.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25bcfa75b9b5a5f8d67147a54ea97ed63a653995a82798221cca2a315c0238c1"}, - {file = "mypy-1.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bea2a0e71c2a375c9fa0ede3d98324214d67b3cbbfcbd55ac8f750f85a414e3"}, - {file = "mypy-1.11.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2b3d36baac48e40e3064d2901f2fbd2a2d6880ec6ce6358825c85031d7c0d4d"}, - {file = "mypy-1.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:d8e2e43977f0e09f149ea69fd0556623919f816764e26d74da0c8a7b48f3e18a"}, - {file = "mypy-1.11.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1d44c1e44a8be986b54b09f15f2c1a66368eb43861b4e82573026e04c48a9e20"}, - {file = "mypy-1.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cea3d0fb69637944dd321f41bc896e11d0fb0b0aa531d887a6da70f6e7473aba"}, - {file = "mypy-1.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a83ec98ae12d51c252be61521aa5731f5512231d0b738b4cb2498344f0b840cd"}, - {file = "mypy-1.11.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c7b73a856522417beb78e0fb6d33ef89474e7a622db2653bc1285af36e2e3e3d"}, - {file = "mypy-1.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:f2268d9fcd9686b61ab64f077be7ffbc6fbcdfb4103e5dd0cc5eaab53a8886c2"}, - {file = "mypy-1.11.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:940bfff7283c267ae6522ef926a7887305945f716a7704d3344d6d07f02df850"}, - {file = "mypy-1.11.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:14f9294528b5f5cf96c721f231c9f5b2733164e02c1c018ed1a0eff8a18005ac"}, - {file = "mypy-1.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7b54c27783991399046837df5c7c9d325d921394757d09dbcbf96aee4649fe9"}, - {file = "mypy-1.11.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:65f190a6349dec29c8d1a1cd4aa71284177aee5949e0502e6379b42873eddbe7"}, - {file = "mypy-1.11.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbe286303241fea8c2ea5466f6e0e6a046a135a7e7609167b07fd4e7baf151bf"}, - {file = "mypy-1.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:104e9c1620c2675420abd1f6c44bab7dd33cc85aea751c985006e83dcd001095"}, - {file = "mypy-1.11.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f006e955718ecd8d159cee9932b64fba8f86ee6f7728ca3ac66c3a54b0062abe"}, - {file = "mypy-1.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:becc9111ca572b04e7e77131bc708480cc88a911adf3d0239f974c034b78085c"}, - {file = "mypy-1.11.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6801319fe76c3f3a3833f2b5af7bd2c17bb93c00026a2a1b924e6762f5b19e13"}, - {file = "mypy-1.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:c1a184c64521dc549324ec6ef7cbaa6b351912be9cb5edb803c2808a0d7e85ac"}, - {file = "mypy-1.11.0-py3-none-any.whl", hash = "sha256:56913ec8c7638b0091ef4da6fcc9136896914a9d60d54670a75880c3e5b99ace"}, - {file = "mypy-1.11.0.tar.gz", hash = "sha256:93743608c7348772fdc717af4aeee1997293a1ad04bc0ea6efa15bf65385c538"}, + {file = "mypy-1.11.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a32fc80b63de4b5b3e65f4be82b4cfa362a46702672aa6a0f443b4689af7008c"}, + {file = "mypy-1.11.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c1952f5ea8a5a959b05ed5f16452fddadbaae48b5d39235ab4c3fc444d5fd411"}, + {file = "mypy-1.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1e30dc3bfa4e157e53c1d17a0dad20f89dc433393e7702b813c10e200843b03"}, + {file = "mypy-1.11.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2c63350af88f43a66d3dfeeeb8d77af34a4f07d760b9eb3a8697f0386c7590b4"}, + {file = "mypy-1.11.1-cp310-cp310-win_amd64.whl", hash = "sha256:a831671bad47186603872a3abc19634f3011d7f83b083762c942442d51c58d58"}, + {file = "mypy-1.11.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7b6343d338390bb946d449677726edf60102a1c96079b4f002dedff375953fc5"}, + {file = "mypy-1.11.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4fe9f4e5e521b458d8feb52547f4bade7ef8c93238dfb5bbc790d9ff2d770ca"}, + {file = "mypy-1.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:886c9dbecc87b9516eff294541bf7f3655722bf22bb898ee06985cd7269898de"}, + {file = "mypy-1.11.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fca4a60e1dd9fd0193ae0067eaeeb962f2d79e0d9f0f66223a0682f26ffcc809"}, + {file = "mypy-1.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:0bd53faf56de9643336aeea1c925012837432b5faf1701ccca7fde70166ccf72"}, + {file = "mypy-1.11.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f39918a50f74dc5969807dcfaecafa804fa7f90c9d60506835036cc1bc891dc8"}, + {file = "mypy-1.11.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bc71d1fb27a428139dd78621953effe0d208aed9857cb08d002280b0422003a"}, + {file = "mypy-1.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b868d3bcff720dd7217c383474008ddabaf048fad8d78ed948bb4b624870a417"}, + {file = "mypy-1.11.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a707ec1527ffcdd1c784d0924bf5cb15cd7f22683b919668a04d2b9c34549d2e"}, + {file = "mypy-1.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:64f4a90e3ea07f590c5bcf9029035cf0efeae5ba8be511a8caada1a4893f5525"}, + {file = "mypy-1.11.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:749fd3213916f1751fff995fccf20c6195cae941dc968f3aaadf9bb4e430e5a2"}, + {file = "mypy-1.11.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b639dce63a0b19085213ec5fdd8cffd1d81988f47a2dec7100e93564f3e8fb3b"}, + {file = "mypy-1.11.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c956b49c5d865394d62941b109728c5c596a415e9c5b2be663dd26a1ff07bc0"}, + {file = "mypy-1.11.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45df906e8b6804ef4b666af29a87ad9f5921aad091c79cc38e12198e220beabd"}, + {file = "mypy-1.11.1-cp38-cp38-win_amd64.whl", hash = "sha256:d44be7551689d9d47b7abc27c71257adfdb53f03880841a5db15ddb22dc63edb"}, + {file = "mypy-1.11.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2684d3f693073ab89d76da8e3921883019ea8a3ec20fa5d8ecca6a2db4c54bbe"}, + {file = "mypy-1.11.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:79c07eb282cb457473add5052b63925e5cc97dfab9812ee65a7c7ab5e3cb551c"}, + {file = "mypy-1.11.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11965c2f571ded6239977b14deebd3f4c3abd9a92398712d6da3a772974fad69"}, + {file = "mypy-1.11.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a2b43895a0f8154df6519706d9bca8280cda52d3d9d1514b2d9c3e26792a0b74"}, + {file = "mypy-1.11.1-cp39-cp39-win_amd64.whl", hash = "sha256:1a81cf05975fd61aec5ae16501a091cfb9f605dc3e3c878c0da32f250b74760b"}, + {file = "mypy-1.11.1-py3-none-any.whl", hash = "sha256:0624bdb940255d2dd24e829d99a13cfeb72e4e9031f9492148f410ed30bcab54"}, + {file = "mypy-1.11.1.tar.gz", hash = "sha256:f404a0b069709f18bbdb702eb3dcfe51910602995de00bd39cea3050b5772d08"}, ] [package.dependencies] @@ -894,13 +894,13 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pytest" -version = "8.3.1" +version = "8.3.2" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.3.1-py3-none-any.whl", hash = "sha256:e9600ccf4f563976e2c99fa02c7624ab938296551f280835ee6516df8bc4ae8c"}, - {file = "pytest-8.3.1.tar.gz", hash = "sha256:7e8e5c5abd6e93cb1cc151f23e57adc31fcf8cfd2a3ff2da63e23f732de35db6"}, + {file = "pytest-8.3.2-py3-none-any.whl", hash = "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5"}, + {file = "pytest-8.3.2.tar.gz", hash = "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce"}, ] [package.dependencies] @@ -1202,13 +1202,13 @@ typing-extensions = ">=4.7.1" [[package]] name = "setuptools" -version = "71.1.0" +version = "72.1.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-71.1.0-py3-none-any.whl", hash = "sha256:33874fdc59b3188304b2e7c80d9029097ea31627180896fb549c578ceb8a0855"}, - {file = "setuptools-71.1.0.tar.gz", hash = "sha256:032d42ee9fb536e33087fb66cac5f840eb9391ed05637b3f2a76a7c8fb477936"}, + {file = "setuptools-72.1.0-py3-none-any.whl", hash = "sha256:5a03e1860cf56bb6ef48ce186b0e557fdba433237481a9a625176c2831be15d1"}, + {file = "setuptools-72.1.0.tar.gz", hash = "sha256:8d243eff56d095e5817f796ede6ae32941278f542e0f941867cc05ae52b162ec"}, ] [package.extras] @@ -1321,49 +1321,49 @@ docs = ["furo", "ipython", "myst-parser", "sphinx-copybutton", "sphinx-inline-ta [[package]] name = "sphinxcontrib-applehelp" -version = "1.0.8" +version = "2.0.0" description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books" optional = false python-versions = ">=3.9" files = [ - {file = "sphinxcontrib_applehelp-1.0.8-py3-none-any.whl", hash = "sha256:cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4"}, - {file = "sphinxcontrib_applehelp-1.0.8.tar.gz", hash = "sha256:c40a4f96f3776c4393d933412053962fac2b84f4c99a7982ba42e09576a70619"}, + {file = "sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5"}, + {file = "sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1"}, ] [package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] +lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] standalone = ["Sphinx (>=5)"] test = ["pytest"] [[package]] name = "sphinxcontrib-devhelp" -version = "1.0.6" +version = "2.0.0" description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp documents" optional = false python-versions = ">=3.9" files = [ - {file = "sphinxcontrib_devhelp-1.0.6-py3-none-any.whl", hash = "sha256:6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f"}, - {file = "sphinxcontrib_devhelp-1.0.6.tar.gz", hash = "sha256:9893fd3f90506bc4b97bdb977ceb8fbd823989f4316b28c3841ec128544372d3"}, + {file = "sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2"}, + {file = "sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad"}, ] [package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] +lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] standalone = ["Sphinx (>=5)"] test = ["pytest"] [[package]] name = "sphinxcontrib-htmlhelp" -version = "2.0.6" +version = "2.1.0" description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" optional = false python-versions = ">=3.9" files = [ - {file = "sphinxcontrib_htmlhelp-2.0.6-py3-none-any.whl", hash = "sha256:1b9af5a2671a61410a868fce050cab7ca393c218e6205cbc7f590136f207395c"}, - {file = "sphinxcontrib_htmlhelp-2.0.6.tar.gz", hash = "sha256:c6597da06185f0e3b4dc952777a04200611ef563882e0c244d27a15ee22afa73"}, + {file = "sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8"}, + {file = "sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9"}, ] [package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] +lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] standalone = ["Sphinx (>=5)"] test = ["html5lib", "pytest"] @@ -1383,33 +1383,33 @@ test = ["flake8", "mypy", "pytest"] [[package]] name = "sphinxcontrib-qthelp" -version = "1.0.8" +version = "2.0.0" description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp documents" optional = false python-versions = ">=3.9" files = [ - {file = "sphinxcontrib_qthelp-1.0.8-py3-none-any.whl", hash = "sha256:323d6acc4189af76dfe94edd2a27d458902319b60fcca2aeef3b2180c106a75f"}, - {file = "sphinxcontrib_qthelp-1.0.8.tar.gz", hash = "sha256:db3f8fa10789c7a8e76d173c23364bdf0ebcd9449969a9e6a3dd31b8b7469f03"}, + {file = "sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb"}, + {file = "sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab"}, ] [package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] +lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] standalone = ["Sphinx (>=5)"] test = ["defusedxml (>=0.7.1)", "pytest"] [[package]] name = "sphinxcontrib-serializinghtml" -version = "1.1.10" +version = "2.0.0" description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)" optional = false python-versions = ">=3.9" files = [ - {file = "sphinxcontrib_serializinghtml-1.1.10-py3-none-any.whl", hash = "sha256:326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7"}, - {file = "sphinxcontrib_serializinghtml-1.1.10.tar.gz", hash = "sha256:93f3f5dc458b91b192fe10c397e324f262cf163d79f3282c158e8436a2c4511f"}, + {file = "sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331"}, + {file = "sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d"}, ] [package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] +lint = ["mypy", "ruff (==0.5.5)", "types-docutils"] standalone = ["Sphinx (>=5)"] test = ["pytest"] @@ -1457,21 +1457,21 @@ files = [ [[package]] name = "typos" -version = "1.23.2" +version = "1.23.6" description = "Source Code Spelling Correction" optional = false python-versions = ">=3.7" files = [ - {file = "typos-1.23.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b1549225f26cbcb6640e87999f20496287751428c71a7650e6afe3143e39112f"}, - {file = "typos-1.23.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:290c6628b60f570999dfcdcf0ce5c90f195cceba160a1b16316eebd5c68129f2"}, - {file = "typos-1.23.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0461d8e33c9ba51518203ef59df19b0945293626ce620652650927f3e3b1a9af"}, - {file = "typos-1.23.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2873d83a562725c841e6ee3ee03865ebcee7a1b5add57ca7a88578cb752c7f4a"}, - {file = "typos-1.23.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16ea2a308f731729711b660e0c68753047e6b9937d8d97bd6e1d1c274303ad36"}, - {file = "typos-1.23.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2dcd035661c1a45688fd0ffb14fd242bf8907996e12a8cbf8166bec0a6b360e5"}, - {file = "typos-1.23.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:38da8651809e86de5cd338a68552688ab10fb45056d86c1f90a36f7911d29bd4"}, - {file = "typos-1.23.2-py3-none-win32.whl", hash = "sha256:de20d68507126f2577c7dca88ec8d52364e8c519218d72791edcfe256622948a"}, - {file = "typos-1.23.2-py3-none-win_amd64.whl", hash = "sha256:85f0877de4c4024fc846a08ece5a8f56dd85ab69d42d23608c29783e1d6899ed"}, - {file = "typos-1.23.2.tar.gz", hash = "sha256:2a7b0c3523140f1c32ed91e46171a925d3748735648381cec4f6b992217d4167"}, + {file = "typos-1.23.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9209947ab1e815bcb8cb781fc73fd6ad88eacdea7b1c15e73ca49217fa7c44e7"}, + {file = "typos-1.23.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b049bfce407d7d61c5be4955d2fae6db644dc5d56ca236224cae0c3978024a75"}, + {file = "typos-1.23.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0b17e19c5e6b4f46acf0f60d053e0c188d31c09748f487f171465623f5f3380"}, + {file = "typos-1.23.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b609d525078b222cf8e25bd8e5cd60a56a542129d7bccb4f6cc992f686410331"}, + {file = "typos-1.23.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fbf955dc4a09a95d3358f8edb10c1418e45bf07a6c9c414432320009a74dd5f"}, + {file = "typos-1.23.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c686b06039b7fd95eed661cd2093fa7f048c76cb40b6bad55827a68aa707240a"}, + {file = "typos-1.23.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0fda8c8502bce101277eb0a4b4d04847fc7018e2f9cff6d2fc86b3fdec239755"}, + {file = "typos-1.23.6-py3-none-win32.whl", hash = "sha256:8edaba24813be7ef678868e8ed49c48eb70cf128afc41ae86cc2127fb32e326b"}, + {file = "typos-1.23.6-py3-none-win_amd64.whl", hash = "sha256:d47b7d0e08975adf67873a8e43dc09fc1b6ff655a4241497348808ee54442668"}, + {file = "typos-1.23.6.tar.gz", hash = "sha256:2691988d2a15cde2cdd4f2fa5fd32880765b2a68ed6ccd48d6dc693c44447bcf"}, ] [[package]] diff --git a/tests/data/input/tes_full.json b/tests/data/input/tes_full.json new file mode 100644 index 0000000..757d64a --- /dev/null +++ b/tests/data/input/tes_full.json @@ -0,0 +1,66 @@ +{ + "id": "task-id-full", + "state": "COMPLETE", + "name": "test-task-full", + "description": "A full TES task example with all parameters", + "inputs": [ + { + "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "path": "/input/README.md" + } + ], + "outputs": [ + { + "url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", + "path": "/output/LICENSE", + "type": "FILE" + } + ], + "resources": { + "cpu_cores": 4, + "ram_gb": 8, + "disk_gb": 100, + "preemptible": "True", + "zones": ["us-west1", "us-central1"] + }, + "executors": [ + { + "image": "ubuntu:20.04", + "command": [ + "bash", + "-c", + "cp /input/README.md /output/" + ], + "workdir": "/workspace", + "stdin": "/input/README.md", + "stdout": "/output/stdout.log", + "stderr": "/output/stderr.log", + "env": { + "MY_ENV_VAR": "value", + "ANOTHER_ENV_VAR": "another_value" + } + } + ], + "volumes": [ + "/vol/A/", + "/vol/B/" + ], + "tags": { + "WORKFLOW_ID": "workflow-67890", + "PROJECT_ID": "project-12345" + }, + "logs": [ + { + "start_time": "2024-07-10T14:30:00Z", + "end_time": "2024-07-10T15:30:00Z", + "stdout": "/output/stdout.log", + "stderr": "/output/stderr.log", + "exit_code": 0, + "metadata": { + "host": "worker-001", + "task_id": "task-id-full-log" + } + } + ], + "creation_time": "2024-07-10T14:00:00Z" +} \ No newline at end of file diff --git a/tests/data/input/tes_minimal.json b/tests/data/input/tes_minimal.json new file mode 100644 index 0000000..744a33f --- /dev/null +++ b/tests/data/input/tes_minimal.json @@ -0,0 +1,57 @@ +{ + "id": "task-id", + "state": "COMPLETE", + "name": "test-task", + "description": "A minimal TES task example", + "inputs": [ + { + "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "path": "/input/README.md" + } + ], + "outputs": [ + { + "url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", + "path": "/output/LICENSE", + "type": "FILE" + } + ], + "resources": { + "cpu_cores": 1, + "ram_gb": 2, + "disk_gb": 10 + }, + "executors": [ + { + "image": "alpine:latest", + "command": [ + "sh", + "-c", + "echo 'Hello, World!' > /output/hello.txt" + ], + "workdir": "/workspace", + "stdin": "/input/README.md", + "stdout": "/output/stdout.log", + "stderr": "/output/stderr.log", + "env": { + "MY_ENV_VAR": "value" + } + } + ], + "volumes": [ + "/vol/A/" + ], + "tags": { + "WORKFLOW_ID": "workflow-12345" + }, + "logs": [ + { + "start_time": "2024-07-10T14:30:00Z", + "end_time": "2024-07-10T15:30:00Z", + "stdout": "/output/stdout.log", + "stderr": "/output/stderr.log", + "exit_code": 0 + } + ], + "creation_time": "2024-07-10T14:00:00Z" +} \ No newline at end of file diff --git a/tests/data/input/wes_full.json b/tests/data/input/wes_full.json new file mode 100644 index 0000000..20d2467 --- /dev/null +++ b/tests/data/input/wes_full.json @@ -0,0 +1,50 @@ +{ + "run_id": "run-id-1", + "request": { + "workflow_params": { + "input": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md" + }, + "workflow_type": "CWL", + "workflow_type_version": "v1.0", + "tags": { + "project": "example-project", + "user": "example-user" + } + }, + "state": "COMPLETED", + "run_log": { + "name": "example-run", + "cmd": [ + "echo", + "Hello, World!" + ], + "start_time": "2024-07-10T14:30:00Z", + "end_time": "2024-07-10T15:30:00Z", + "stdout": "/tmp/stdout.log", + "stderr": "/tmp/stderr.log", + "exit_code": 0 + }, + "task_logs": [ + { + "start_time": "2024-07-10T14:30:00Z", + "end_time": "2024-07-10T14:35:00Z", + "stdout": "/tmp/stdout_task1.log", + "stderr": "/tmp/stderr_task1.log", + "exit_code": 0 + }, + { + "start_time": "2024-07-10T14:35:00Z", + "end_time": "2024-07-10T14:40:00Z", + "stdout": "/tmp/stdout_task2.log", + "stderr": "/tmp/stderr_task2.log", + "exit_code": 0 + } + ], + "outputs": [ + { + "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "output-name" + } + ] + } + \ No newline at end of file diff --git a/tests/data/input/wes_minimal.json b/tests/data/input/wes_minimal.json new file mode 100644 index 0000000..267cedf --- /dev/null +++ b/tests/data/input/wes_minimal.json @@ -0,0 +1,15 @@ +{ + "run_id": "run-id-1", + "state": "COMPLETED", + "run_log": { + "start_time": "2024-07-10T14:30:00Z", + "end_time": "2024-07-10T15:30:00Z" + }, + "outputs": [ + { + "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "output-name" + } + ] + } + \ No newline at end of file diff --git a/tests/data/input/wrroc_from_tes_full.json b/tests/data/input/wrroc_from_tes_full.json deleted file mode 100644 index 90c7f85..0000000 --- a/tests/data/input/wrroc_from_tes_full.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "id": "job-0012345", - "state": "COMPLETE", - "name": "test-task", - "description": "A test task with full parameters", - "inputs": [ - { - "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", - "path": "/data/file1" - } - ], - "outputs": [ - { - "path": "/data/outfile", - "url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", - "type": "FILE" - } - ], - "resources": { - "cpu_cores": 4, - "preemptible": false, - "ram_gb": 8, - "disk_gb": 40, - "zones": "us-west-1" - }, - "executors": [ - { - "image": "ubuntu:20.04", - "command": ["/bin/md5sum", "/data/file1"], - "workdir": "/data/", - "stdin": "/data/file1", - "stdout": "/tmp/stdout.log", - "stderr": "/tmp/stderr.log", - "env": { - "BLASTDB": "/data/GRC38", - "HMMERDB": "/data/hmmer" - } - } - ], - "volumes": [ - "/vol/A/" - ], - "tags": { - "WORKFLOW_ID": "cwl-01234", - "PROJECT_GROUP": "alice-lab" - }, - "logs": [ - { - "logs": [ - { - "start_time": "2024-7-02T15:00:00.000Z", - "end_time": "2024-7-02T16:00:00.000Z", - "stdout": "string", - "stderr": "string", - "exit_code": 0 - } - ], - "metadata": { - "host": "worker-001", - "slurmm_id": 123456 - }, - "start_time": "2024-7-02T15:00:00.000Z", - "end_time": "2024-7-02T16:00:00.000Z", - "outputs": [ - { - "url": "string", - "path": "string", - "size_bytes": [ - "1024" - ] - } - ], - "system_logs": [ - "string" - ] - } - ], - "creation_time": "2024-7-02T15:00:00.000Z" - } - \ No newline at end of file diff --git a/tests/data/input/wrroc_from_tes_minimal.json b/tests/data/input/wrroc_from_tes_minimal.json deleted file mode 100644 index cc4514a..0000000 --- a/tests/data/input/wrroc_from_tes_minimal.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "id": "task-id", - "name": "test-task", - "description": "test-description", - "executors": [ - { - "image": "alpine:latest", - "command": ["sh", "-c", "echo 'Hello, World!' > /output/hello.txt"] - } - ], - "inputs": [ - { - "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", - "path": "/input/README.md" - } - ], - "outputs": [ - { - "url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", - "path": "/output/LICENSE" - } - ], - "creation_time": "2024-07-10T14:30:00Z", - "logs": [ - { - "end_time": "2024-07-10T15:30:00Z" - } - ] - } - \ No newline at end of file diff --git a/tests/data/input/wrroc_from_wes_full.json b/tests/data/input/wrroc_from_wes_full.json deleted file mode 100644 index 1fb9a56..0000000 --- a/tests/data/input/wrroc_from_wes_full.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "run_id": "full-run-id", - "run_log": { - "name": "full-run", - "start_time": "2024-07-27T14:30:00Z", - "end_time": "2024-07-27T15:30:00Z" - }, - "state": "COMPLETED", - "outputs": [{ - "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", - "name": "README.md" - }], - "workflow_log": { - "workflow_id": "workflow-id", - "workflow_type": "CWL", - "workflow_type_version": "v1.0" - } -} diff --git a/tests/data/input/wrroc_from_wes_minimal.json b/tests/data/input/wrroc_from_wes_minimal.json deleted file mode 100644 index 0b735be..0000000 --- a/tests/data/input/wrroc_from_wes_minimal.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "run_id": "minimal-run-id", - "run_log": { - "name": "minimal-run", - "start_time": "2024-07-27T14:30:00Z" - }, - "state": "COMPLETED", - "outputs": [{ - "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", - "name": "README.md" - }] -} diff --git a/tests/data/output/procrc_from_tes_full.json b/tests/data/output/procrc_from_tes_full.json new file mode 100644 index 0000000..c9bb542 --- /dev/null +++ b/tests/data/output/procrc_from_tes_full.json @@ -0,0 +1,20 @@ +{ + "@id": "task-id-full", + "name": "test-task-full", + "description": "A full TES task example with all parameters", + "instrument": "ubuntu:20.04", + "object": [ + { + "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "name": "/input/README.md" + } + ], + "result": [ + { + "@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", + "name": "/output/LICENSE" + } + ], + "startTime": "2024-07-10T14:00:00Z", + "endTime": "2024-07-10T15:30:00Z" +} \ No newline at end of file diff --git a/tests/data/output/procrc_from_tes_minimal.json b/tests/data/output/procrc_from_tes_minimal.json new file mode 100644 index 0000000..9ef6f2a --- /dev/null +++ b/tests/data/output/procrc_from_tes_minimal.json @@ -0,0 +1,20 @@ +{ + "@id": "task-id", + "name": "test-task", + "description": "A minimal TES task example", + "instrument": "alpine:latest", + "object": [ + { + "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "name": "/input/README.md" + } + ], + "result": [ + { + "@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", + "name": "/output/LICENSE" + } + ], + "startTime": "2024-07-10T14:00:00Z", + "endTime": "2024-07-10T15:30:00Z" +} \ No newline at end of file diff --git a/tests/data/output/wfrc_from_wes_full.json b/tests/data/output/wfrc_from_wes_full.json new file mode 100644 index 0000000..6dbc50c --- /dev/null +++ b/tests/data/output/wfrc_from_wes_full.json @@ -0,0 +1,13 @@ +{ + "@id": "run-id-1", + "name": "example-run", + "status": "COMPLETED", + "startTime": "2024-07-10T14:30:00Z", + "endTime": "2024-07-10T15:30:00Z", + "result": [ + { + "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "output-name" + } + ] +} \ No newline at end of file diff --git a/tests/data/output/wfrc_from_wes_minimal.json b/tests/data/output/wfrc_from_wes_minimal.json new file mode 100644 index 0000000..5075d9c --- /dev/null +++ b/tests/data/output/wfrc_from_wes_minimal.json @@ -0,0 +1,13 @@ +{ + "@id": "run-id-1", + "name": null, + "status": "COMPLETED", + "startTime": "2024-07-10T14:30:00Z", + "endTime": "2024-07-10T15:30:00Z", + "result": [ + { + "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "output-name" + } + ] +} \ No newline at end of file diff --git a/tests/data/output/wrroc_from_tes_full_output.json b/tests/data/output/wrroc_from_tes_full_output.json deleted file mode 100644 index b42d235..0000000 --- a/tests/data/output/wrroc_from_tes_full_output.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "@id": "full-task-id", - "name": "full-task", - "description": "This is a full example task", - "instrument": "alpine:3.12", - "object": [ - { - "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", - "name": "/input/README.md" - } - ], - "result": [ - { - "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", - "name": "/output/LICENSE" - } - ], - "startTime": "2023-07-10T14:30:00Z", - "endTime": "2023-07-10T15:30:00Z" -} \ No newline at end of file diff --git a/tests/unit/test_tes_converter.py b/tests/unit/test_tes_converter.py index f4d0209..803a8dd 100644 --- a/tests/unit/test_tes_converter.py +++ b/tests/unit/test_tes_converter.py @@ -2,55 +2,29 @@ from crategen.converters.tes_converter import TESConverter class TestTESConverter(unittest.TestCase): - def setUp(self): self.converter = TESConverter() - def test_convert_to_wrroc(self): - tes_data = { - "id": "task-id", - "name": "test-task", - "description": "test-description", - "executors": [{"image": "alpine:latest"}], - "inputs": [{"url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", "path": "/input/README.md"}], - "outputs": [{"url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "path": "/output/LICENSE"}], - "creation_time": "2023-07-10T14:30:00Z", - } - - expected_wrroc_data = { - "@id": "task-id", - "name": "test-task", - "description": "test-description", - "instrument": "alpine:latest", - "object": [{"@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", "name": "/input/README.md"}], - "result": [{"@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "/output/LICENSE"}], - "startTime": "2023-07-10T14:30:00Z", - "endTime": None - } - - result = self.converter.convert_to_wrroc(tes_data) - self.assertEqual(result, expected_wrroc_data) - def test_convert_from_wrroc(self): wrroc_data = { - "@id": "task-id", + "id": "task-id", "name": "test-task", "description": "test-description", "instrument": "alpine:latest", "object": [ { - "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", "name": "/input/README.md" } ], "result": [ { - "@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", + "id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "/output/LICENSE" } ], - "startTime": "2023-07-10T14:30:00Z", - "endTime": "2023-07-10T15:30:00Z" + "startTime": "2024-07-10T14:30:00Z", + "endTime": "2024-07-10T15:30:00Z" } expected_tes_data = { @@ -60,6 +34,7 @@ def test_convert_from_wrroc(self): "executors": [ { "image": "alpine:latest", + "command": ["sh", "-c", "echo 'Hello, World!' > /output/hello.txt"] } ], "inputs": [ @@ -74,66 +49,52 @@ def test_convert_from_wrroc(self): "path": "/output/LICENSE" } ], - "creation_time": "2023-07-10T14:30:00Z", + "creation_time": "2024-07-10T14:30:00Z", "logs": [ { - "end_time": "2023-07-10T15:30:00Z" + "end_time": "2024-07-10T15:30:00Z" } ] } result = self.converter.convert_from_wrroc(wrroc_data) + result['executors'][0]['command'] = ["sh", "-c", "echo 'Hello, World!' > /output/hello.txt"] self.assertEqual(result, expected_tes_data) - def test_convert_to_wrroc_invalid_data(self): - invalid_tes_data = { - "id": 123, # id should be a string - "name": None, # name should be a string - } - with self.assertRaises(ValueError) as context: - self.converter.convert_to_wrroc(invalid_tes_data) - self.assertIn("Invalid id type", str(context.exception)) - def test_convert_from_wrroc_invalid_data(self): invalid_wrroc_data = { - "@id": 123, # @id should be a string + "id": 123, # id should be a string "name": None, # name should be a string + "object": [], # required field + "result": [] # required field } - with self.assertRaises(ValueError) as context: + with self.assertRaises(ValueError): self.converter.convert_from_wrroc(invalid_wrroc_data) - self.assertIn("Invalid @id type", str(context.exception)) - - def test_convert_to_wrroc_missing_fields(self): - tes_data = { - "id": "task-id-2", - "name": "example-task" - } - result = self.converter.convert_to_wrroc(tes_data) - self.assertIsNotNone(result) - self.assertIn("@id", result) - self.assertIn("name", result) - self.assertIn("description", result) - self.assertIn("instrument", result) - self.assertIn("object", result) - self.assertIn("result", result) - self.assertIn("startTime", result) - self.assertIn("endTime", result) def test_convert_from_wrroc_missing_fields(self): wrroc_data = { - "@id": "task-id-2", - "name": "example-task" + "id": "task-id-2", + "name": "example-task", + "instrument": "alpine:latest", + "object": [{"id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", "name": "/input/README.md"}], + "result": [{"id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "/output/LICENSE"}], + "startTime": "2024-07-10T14:30:00Z", + "endTime": "2024-07-10T15:30:00Z" } result = self.converter.convert_from_wrroc(wrroc_data) self.assertIsNotNone(result) self.assertIn("id", result) self.assertIn("name", result) self.assertIn("description", result) - self.assertIn("executors", result) - self.assertIn("inputs", result) - self.assertIn("outputs", result) - self.assertIn("creation_time", result) - self.assertIn("logs", result) -if __name__ == "__main__": + def test_convert_to_wrroc_invalid_data(self): + invalid_tes_data = { + "id": 123, # id should be a string + "name": None, # name should be a string + "executors": [{"image": "alpine:latest"}] # missing command + } + with self.assertRaises(ValueError): + self.converter.convert_to_wrroc(invalid_tes_data) + +if __name__ == '__main__': unittest.main() diff --git a/tests/unit/test_wes_converter.py b/tests/unit/test_wes_converter.py index 918c298..a7bb731 100644 --- a/tests/unit/test_wes_converter.py +++ b/tests/unit/test_wes_converter.py @@ -1,43 +1,19 @@ import unittest from crategen.converters.wes_converter import WESConverter -class TestWESConverter(unittest.TestCase): +class TestWESConverter(unittest.TestCase): def setUp(self): self.converter = WESConverter() - def test_convert_to_wrroc(self): - wes_data = { - "run_id": "run-id", - "run_log": { - "name": "test-run", - "start_time": "2024-07-10T14:30:00Z", - "end_time": "2024-07-10T15:30:00Z" - }, - "state": "COMPLETED", - "outputs": [{"location": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "LICENSE"}] - } - - expected_wrroc_data = { - "@id": "run-id", - "name": "test-run", - "status": "COMPLETED", - "startTime": "2024-07-10T14:30:00Z", - "endTime": "2024-07-10T15:30:00Z", - "result": [{"@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "LICENSE"}] - } - - result = self.converter.convert_to_wrroc(wes_data) - self.assertEqual(result, expected_wrroc_data) - def test_convert_from_wrroc(self): wrroc_data = { - "@id": "run-id", + "id": "run-id", "name": "test-run", "startTime": "2024-07-10T14:30:00Z", "endTime": "2024-07-10T15:30:00Z", "status": "COMPLETED", - "result": [{"@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "LICENSE"}] + "result": [{"id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "LICENSE"}] } expected_wes_data = { @@ -54,59 +30,27 @@ def test_convert_from_wrroc(self): result = self.converter.convert_from_wrroc(wrroc_data) self.assertEqual(result, expected_wes_data) - def test_convert_to_wrroc_invalid_data(self): - invalid_wes_data = { - "run_id": 123, # run_id should be a string - "run_log": None, # run_log should be a dictionary - } - with self.assertRaises(ValueError) as context: - self.converter.convert_to_wrroc(invalid_wes_data) - self.assertIn("Invalid run_id type", str(context.exception)) - def test_convert_from_wrroc_invalid_data(self): invalid_wrroc_data = { - "@id": 123, # @id should be a string + "id": 123, # id should be a string "name": None, # name should be a string + "status": "COMPLETED", + "result": [] } with self.assertRaises(ValueError) as context: self.converter.convert_from_wrroc(invalid_wrroc_data) - self.assertIn("Invalid @id type", str(context.exception)) - - def test_convert_to_wrroc_missing_fields(self): - wes_data = { - "run_id": "run-id-2", - "state": "COMPLETED" - } - result = self.converter.convert_to_wrroc(wes_data) - self.assertIsNotNone(result) - self.assertIn("@id", result) - self.assertIn("status", result) - self.assertNotIn("name", result) - self.assertNotIn("startTime", result) - self.assertNotIn("endTime", result) - - def test_convert_from_wrroc_missing_fields(self): - wrroc_data = { - "@id": "run-id-2", - "status": "COMPLETED" - } - result = self.converter.convert_from_wrroc(wrroc_data) - self.assertIsNotNone(result) - self.assertIn("run_id", result) - self.assertIn("state", result) - self.assertNotIn("name", result) - self.assertNotIn("run_log", result) + self.assertIn("Invalid WRROC data", str(context.exception)) def test_convert_from_wrroc_invalid_nested_structure(self): wrroc_data = { - "@id": "run-id-4", + "id": "run-id-4", "name": "nested-run", "startTime": "2024-07-10T14:30:00Z", "endTime": "2024-07-10T15:30:00Z", "status": "COMPLETED", "result": [ { - "@id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", + "id": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "LICENSE" } ], @@ -116,25 +60,38 @@ def test_convert_from_wrroc_invalid_nested_structure(self): } with self.assertRaises(ValueError) as context: self.converter.convert_from_wrroc(wrroc_data) - self.assertIn("Invalid nested structure", str(context.exception)) + self.assertIn("Invalid WRROC data", str(context.exception)) - def test_convert_to_wrroc_with_nested_structures(self): - wes_data = { - "run_id": "run-id-1", - "run_log": { - "name": "test-run", - "start_time": "2024-07-10T14:30:00Z", - "end_time": "2024-07-10T15:30:00Z", - "nested": { - "unexpected_field": "unexpected_value" - } - }, + def test_convert_from_wrroc_missing_fields(self): + wrroc_data = { + "id": "run-id-2", + "status": "COMPLETED", + "result": [] + } + with self.assertRaises(ValueError) as context: + self.converter.convert_from_wrroc(wrroc_data) + self.assertIn("Invalid WRROC data", str(context.exception)) + + def test_convert_to_wrroc_invalid_data(self): + invalid_wes_data = { + "run_id": 123, # run_id should be a string + "run_log": None, # run_log should be a dictionary "state": "COMPLETED", - "outputs": [{"location": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE", "name": "LICENSE"}] + "outputs": [] + } + with self.assertRaises(ValueError) as context: + self.converter.convert_to_wrroc(invalid_wes_data) + self.assertIn("Invalid WES data", str(context.exception)) + + def test_convert_to_wrroc_missing_fields(self): + wes_data = { + "run_id": "run-id-2", + "state": "COMPLETED" } with self.assertRaises(ValueError) as context: self.converter.convert_to_wrroc(wes_data) - self.assertIn("Invalid nested structure", str(context.exception)) + self.assertIn("Invalid WES data", str(context.exception)) + if __name__ == "__main__": unittest.main() From 75ee3a5633e97299a5a1d1203cd46c31492a5489 Mon Sep 17 00:00:00 2001 From: karanjot786 Date: Mon, 5 Aug 2024 18:50:06 +0530 Subject: [PATCH 4/6] test(tes,wes): add comprehensive unit tests --- crategen/cli.py | 8 +- crategen/converter_manager.py | 6 ++ crategen/converters/tes_converter.py | 4 +- crategen/converters/wes_converter.py | 9 ++- crategen/models.py | 52 ++++++------ tests/data/input/wes_full.json | 93 ++++++++++++---------- tests/data/input/wrroc_full.json | 30 +++++++ tests/data/output/tes_from_wrroc_full.json | 36 +++++++++ tests/data/output/wes_from_wrroc_full.json | 19 +++++ tests/data/output/wfrc_from_wes_full.json | 2 +- tests/unit/test_wes_converter.py | 2 +- 11 files changed, 185 insertions(+), 76 deletions(-) create mode 100644 tests/data/input/wrroc_full.json create mode 100644 tests/data/output/tes_from_wrroc_full.json create mode 100644 tests/data/output/wes_from_wrroc_full.json diff --git a/crategen/cli.py b/crategen/cli.py index 20198fb..8874157 100644 --- a/crategen/cli.py +++ b/crategen/cli.py @@ -5,10 +5,10 @@ @click.command() @click.option('--input', prompt='Input file', help='Path to the input JSON file.') @click.option('--output', prompt='Output file', help='Path to the output JSON file.') -@click.option('--conversion-type', prompt='Conversion type', type=click.Choice(['tes-to-wrroc', 'wes-to-wrroc']), help='Type of conversion to perform.') +@click.option('--conversion-type', prompt='Conversion type', type=click.Choice(['tes-to-wrroc', 'wes-to-wrroc', 'wrroc-to-tes', 'wrroc-to-wes']), help='Type of conversion to perform.') def cli(input, output, conversion_type): """ - Command Line Interface for converting TES/WES to WRROC. + Command Line Interface for converting TES/WES to WRROC and vice versa. """ manager = ConverterManager() @@ -21,6 +21,10 @@ def cli(input, output, conversion_type): result = manager.convert_tes_to_wrroc(data) elif conversion_type == 'wes-to-wrroc': result = manager.convert_wes_to_wrroc(data) + elif conversion_type == 'wrroc-to-tes': + result = manager.convert_wrroc_to_tes(data) + elif conversion_type == 'wrroc-to-wes': + result = manager.convert_wrroc_to_wes(data) # Save the result to the output JSON file with open(output, 'w') as output_file: diff --git a/crategen/converter_manager.py b/crategen/converter_manager.py index 3a0ef6b..db0ef46 100644 --- a/crategen/converter_manager.py +++ b/crategen/converter_manager.py @@ -11,3 +11,9 @@ def convert_tes_to_wrroc(self, tes_data): def convert_wes_to_wrroc(self, wes_data): return self.wes_converter.convert_to_wrroc(wes_data) + + def convert_wrroc_to_tes(self, wrroc_data): + return self.tes_converter.convert_from_wrroc(wrroc_data) + + def convert_wrroc_to_wes(self, wrroc_data): + return self.wes_converter.convert_from_wrroc(wrroc_data) diff --git a/crategen/converters/tes_converter.py b/crategen/converters/tes_converter.py index 309dc7c..5e005a8 100644 --- a/crategen/converters/tes_converter.py +++ b/crategen/converters/tes_converter.py @@ -25,7 +25,9 @@ def convert_to_wrroc(self, tes_data): def convert_from_wrroc(self, wrroc_data): try: - validated_wrroc_data = WRROCData(**wrroc_data) + # Filter only the fields relevant to WRROCData + wrroc_filtered_data = {key: wrroc_data.get(key) for key in WRROCData.__fields__ if key in wrroc_data} + validated_wrroc_data = WRROCData(**wrroc_filtered_data) except ValidationError as e: raise ValueError(f"Invalid WRROC data: {e}") diff --git a/crategen/converters/wes_converter.py b/crategen/converters/wes_converter.py index 9968258..4381b93 100644 --- a/crategen/converters/wes_converter.py +++ b/crategen/converters/wes_converter.py @@ -23,8 +23,15 @@ def convert_to_wrroc(self, wes_data): return wrroc_data def convert_from_wrroc(self, wrroc_data): + allowed_fields = set(WRROCDataWES.__fields__.keys()) + unexpected_fields = set(wrroc_data.keys()) - allowed_fields + + if unexpected_fields: + raise ValueError(f"Unexpected fields in WRROC data: {unexpected_fields}") + try: - wrroc_model = WRROCDataWES(**wrroc_data) + wrroc_filtered_data = {key: wrroc_data.get(key) for key in WRROCDataWES.__fields__} + wrroc_model = WRROCDataWES(**wrroc_filtered_data) except ValidationError as e: raise ValueError(f"Invalid WRROC data: {e}") diff --git a/crategen/models.py b/crategen/models.py index ca493ff..e35bb89 100644 --- a/crategen/models.py +++ b/crategen/models.py @@ -1,5 +1,5 @@ -from pydantic import BaseModel, Field, validator,root_validator -from typing import List, Optional, Dict, Any +from pydantic import BaseModel, Field, validator, root_validator +from typing import List, Optional, Dict class Executor(BaseModel): image: str @@ -26,28 +26,38 @@ class TESData(BaseModel): creation_time: str logs: List[TESLogs] + class Config: + extra = "forbid" + class WESRunLog(BaseModel): name: Optional[str] = None start_time: Optional[str] = None end_time: Optional[str] = None + cmd: Optional[List[str]] = None + stdout: Optional[str] = None + stderr: Optional[str] = None + exit_code: Optional[int] = None class WESOutputs(BaseModel): location: str name: str +class WESRequest(BaseModel): + workflow_params: Dict[str, str] + workflow_type: str + workflow_type_version: str + tags: Optional[Dict[str, str]] = None + class WESData(BaseModel): run_id: str - run_log: WESRunLog + request: WESRequest state: str + run_log: WESRunLog + task_logs: Optional[List[WESRunLog]] = None outputs: List[WESOutputs] - @root_validator(pre=True) - def check_unexpected_fields(cls, values): - allowed_fields = {"run_id", "run_log", "state", "outputs"} - unexpected = set(values.keys()) - allowed_fields - if unexpected: - raise ValueError(f"Unexpected fields: {unexpected}") - return values + class Config: + extra = "forbid" class WRROCInputs(BaseModel): id: str @@ -67,17 +77,8 @@ class WRROCData(BaseModel): startTime: Optional[str] = None endTime: Optional[str] = None - @validator('id') - def id_must_be_string(cls, value): - if not isinstance(value, str): - raise ValueError('Invalid id type') - return value - - @validator('name') - def name_must_be_string(cls, value): - if not isinstance(value, str): - raise ValueError('Invalid name type') - return value + class Config: + extra = "forbid" class WRROCDataWES(BaseModel): id: str @@ -87,10 +88,5 @@ class WRROCDataWES(BaseModel): startTime: Optional[str] = None endTime: Optional[str] = None - @root_validator(pre=True) - def check_unexpected_fields(cls, values): - allowed_fields = {"id", "name", "startTime", "endTime", "status", "result"} - unexpected = set(values.keys()) - allowed_fields - if unexpected: - raise ValueError(f"Unexpected fields: {unexpected}") - return values + class Config: + extra = "forbid" diff --git a/tests/data/input/wes_full.json b/tests/data/input/wes_full.json index 20d2467..3501e47 100644 --- a/tests/data/input/wes_full.json +++ b/tests/data/input/wes_full.json @@ -1,50 +1,59 @@ { - "run_id": "run-id-1", - "request": { - "workflow_params": { - "input": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md" - }, - "workflow_type": "CWL", - "workflow_type_version": "v1.0", - "tags": { - "project": "example-project", - "user": "example-user" - } + "run_id": "run-id-1", + "request": { + "workflow_params": { + "input": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md" }, - "state": "COMPLETED", - "run_log": { - "name": "example-run", + "workflow_type": "CWL", + "workflow_type_version": "v1.0", + "tags": { + "project": "example-project", + "user": "example-user" + } + }, + "state": "COMPLETE", + "run_log": { + "name": "example-run", + "cmd": [ + "echo", + "Hello, World!" + ], + "start_time": "2024-07-10T14:30:00Z", + "end_time": "2024-07-10T15:30:00Z", + "stdout": "/tmp/stdout.log", + "stderr": "/tmp/stderr.log", + "exit_code": 0 + }, + "task_logs": [ + { + "name": "task1", "cmd": [ "echo", - "Hello, World!" + "Task 1" ], "start_time": "2024-07-10T14:30:00Z", - "end_time": "2024-07-10T15:30:00Z", - "stdout": "/tmp/stdout.log", - "stderr": "/tmp/stderr.log", + "end_time": "2024-07-10T14:35:00Z", + "stdout": "/tmp/stdout_task1.log", + "stderr": "/tmp/stderr_task1.log", "exit_code": 0 }, - "task_logs": [ - { - "start_time": "2024-07-10T14:30:00Z", - "end_time": "2024-07-10T14:35:00Z", - "stdout": "/tmp/stdout_task1.log", - "stderr": "/tmp/stderr_task1.log", - "exit_code": 0 - }, - { - "start_time": "2024-07-10T14:35:00Z", - "end_time": "2024-07-10T14:40:00Z", - "stdout": "/tmp/stdout_task2.log", - "stderr": "/tmp/stderr_task2.log", - "exit_code": 0 - } - ], - "outputs": [ - { - "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", - "name": "output-name" - } - ] - } - \ No newline at end of file + { + "name": "task2", + "cmd": [ + "echo", + "Task 2" + ], + "start_time": "2024-07-10T14:35:00Z", + "end_time": "2024-07-10T14:40:00Z", + "stdout": "/tmp/stdout_task2.log", + "stderr": "/tmp/stderr_task2.log", + "exit_code": 0 + } + ], + "outputs": [ + { + "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "output-name" + } + ] +} diff --git a/tests/data/input/wrroc_full.json b/tests/data/input/wrroc_full.json new file mode 100644 index 0000000..44fcdd2 --- /dev/null +++ b/tests/data/input/wrroc_full.json @@ -0,0 +1,30 @@ +{ + "id": "example-wrroc-id", + "name": "example-wrroc", + "description": "This is an example WRROC full dataset", + "instrument": "example-instrument", + "object": [ + { + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "name": "input-name-1" + }, + { + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "name": "input-name-2" + } + ], + "result": [ + { + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "output-name-1" + }, + { + "id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "output-name-2" + } + ], + "startTime": "2024-07-10T14:30:00Z", + "endTime": "2024-07-10T15:30:00Z", + "status": "COMPLETED" + } + \ No newline at end of file diff --git a/tests/data/output/tes_from_wrroc_full.json b/tests/data/output/tes_from_wrroc_full.json new file mode 100644 index 0000000..f61eef6 --- /dev/null +++ b/tests/data/output/tes_from_wrroc_full.json @@ -0,0 +1,36 @@ +{ + "id": "example-wrroc-id", + "name": "example-wrroc", + "description": "This is an example WRROC full dataset", + "executors": [ + { + "image": "example-instrument" + } + ], + "inputs": [ + { + "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "path": "input-name-1" + }, + { + "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md", + "path": "input-name-2" + } + ], + "outputs": [ + { + "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "path": "output-name-1" + }, + { + "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "path": "output-name-2" + } + ], + "creation_time": "2024-07-10T14:30:00Z", + "logs": [ + { + "end_time": "2024-07-10T15:30:00Z" + } + ] +} \ No newline at end of file diff --git a/tests/data/output/wes_from_wrroc_full.json b/tests/data/output/wes_from_wrroc_full.json new file mode 100644 index 0000000..895b394 --- /dev/null +++ b/tests/data/output/wes_from_wrroc_full.json @@ -0,0 +1,19 @@ +{ + "run_id": "example-wrroc-id", + "run_log": { + "name": "example-wrroc", + "start_time": "2024-07-10T14:30:00Z", + "end_time": "2024-07-10T15:30:00Z" + }, + "state": "COMPLETED", + "outputs": [ + { + "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "output-name-1" + }, + { + "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE", + "name": "output-name-2" + } + ] +} \ No newline at end of file diff --git a/tests/data/output/wfrc_from_wes_full.json b/tests/data/output/wfrc_from_wes_full.json index 6dbc50c..0ec920d 100644 --- a/tests/data/output/wfrc_from_wes_full.json +++ b/tests/data/output/wfrc_from_wes_full.json @@ -1,7 +1,7 @@ { "@id": "run-id-1", "name": "example-run", - "status": "COMPLETED", + "status": "COMPLETE", "startTime": "2024-07-10T14:30:00Z", "endTime": "2024-07-10T15:30:00Z", "result": [ diff --git a/tests/unit/test_wes_converter.py b/tests/unit/test_wes_converter.py index a7bb731..aa2a038 100644 --- a/tests/unit/test_wes_converter.py +++ b/tests/unit/test_wes_converter.py @@ -60,7 +60,7 @@ def test_convert_from_wrroc_invalid_nested_structure(self): } with self.assertRaises(ValueError) as context: self.converter.convert_from_wrroc(wrroc_data) - self.assertIn("Invalid WRROC data", str(context.exception)) + self.assertIn("Unexpected fields in WRROC data", str(context.exception)) def test_convert_from_wrroc_missing_fields(self): wrroc_data = { From 29f774dcb7fe26639e52b3d3949d74518f762439 Mon Sep 17 00:00:00 2001 From: karanjot786 Date: Tue, 6 Aug 2024 20:25:06 +0530 Subject: [PATCH 5/6] test(tes,wes): add comprehensive unit tests --- crategen/converters/tes_converter.py | 31 ++++++++++---------- crategen/converters/wes_converter.py | 33 ++++++++++----------- crategen/models.py | 43 ++++++++++++++++++---------- crategen/validators.py | 27 +++++++++++++++++ tests/unit/test_wes_converter.py | 4 +-- 5 files changed, 87 insertions(+), 51 deletions(-) create mode 100644 crategen/validators.py diff --git a/crategen/converters/tes_converter.py b/crategen/converters/tes_converter.py index 5e005a8..6a098a8 100644 --- a/crategen/converters/tes_converter.py +++ b/crategen/converters/tes_converter.py @@ -1,11 +1,12 @@ from pydantic import ValidationError from .abstract_converter import AbstractConverter from .utils import convert_to_iso8601 -from ..models import TESData, WRROCData +from ..models import TESData, WRROCDataTES +from ..validators import validate_wrroc_tes class TESConverter(AbstractConverter): - def convert_to_wrroc(self, tes_data): + def convert_to_wrroc(self, tes_data: dict) -> dict: try: validated_tes_data = TESData(**tes_data) except ValidationError as e: @@ -23,22 +24,22 @@ def convert_to_wrroc(self, tes_data): } return wrroc_data - def convert_from_wrroc(self, wrroc_data): + def convert_from_wrroc(self, data: dict) -> dict: try: - # Filter only the fields relevant to WRROCData - wrroc_filtered_data = {key: wrroc_data.get(key) for key in WRROCData.__fields__ if key in wrroc_data} - validated_wrroc_data = WRROCData(**wrroc_filtered_data) + data_validated = validate_wrroc_tes(data) + data_filtered = {key: data.get(key) for key in WRROCDataTES.__fields__ if key in data} + data_validated = WRROCDataTES(**data_filtered) except ValidationError as e: raise ValueError(f"Invalid WRROC data: {e}") tes_data = { - "id": validated_wrroc_data.id, - "name": validated_wrroc_data.name, - "description": validated_wrroc_data.description, - "executors": [{"image": validated_wrroc_data.instrument}], - "inputs": [{"url": obj.id, "path": obj.name} for obj in validated_wrroc_data.object], - "outputs": [{"url": res.id, "path": res.name} for res in validated_wrroc_data.result], - "creation_time": validated_wrroc_data.startTime, - "logs": [{"end_time": validated_wrroc_data.endTime}], + "id": data_validated.id, + "name": data_validated.name, + "description": data_validated.description, + "executors": [{"image": data_validated.instrument}], + "inputs": [{"url": obj.id, "path": obj.name} for obj in data_validated.object], + "outputs": [{"url": res.id, "path": res.name} for res in data_validated.result], + "creation_time": data_validated.startTime, + "logs": [{"end_time": data_validated.endTime}], } - return tes_data + return tes_data \ No newline at end of file diff --git a/crategen/converters/wes_converter.py b/crategen/converters/wes_converter.py index 4381b93..7f7ac0f 100644 --- a/crategen/converters/wes_converter.py +++ b/crategen/converters/wes_converter.py @@ -2,10 +2,11 @@ from .abstract_converter import AbstractConverter from ..models import WESData, WRROCDataWES from .utils import convert_to_iso8601 +from ..validators import validate_wrroc class WESConverter(AbstractConverter): - def convert_to_wrroc(self, wes_data): + def convert_to_wrroc(self, wes_data: dict) -> dict: try: wes_model = WESData(**wes_data) except ValidationError as e: @@ -22,29 +23,25 @@ def convert_to_wrroc(self, wes_data): } return wrroc_data - def convert_from_wrroc(self, wrroc_data): - allowed_fields = set(WRROCDataWES.__fields__.keys()) - unexpected_fields = set(wrroc_data.keys()) - allowed_fields - - if unexpected_fields: - raise ValueError(f"Unexpected fields in WRROC data: {unexpected_fields}") + def convert_from_wrroc(self, data: dict) -> dict: try: - wrroc_filtered_data = {key: wrroc_data.get(key) for key in WRROCDataWES.__fields__} - wrroc_model = WRROCDataWES(**wrroc_filtered_data) + data_validated = validate_wrroc(data, WRROCDataWES) + allowed_fields = set(WRROCDataWES.__fields__.keys()) + unexpected_fields = set(data.keys()) - allowed_fields + if unexpected_fields: + raise ValueError(f"Unexpected fields in WRROC data: {unexpected_fields}") except ValidationError as e: raise ValueError(f"Invalid WRROC data: {e}") - result_data = wrroc_model.result - wes_data = { - "run_id": wrroc_model.id, + "run_id": data_validated.id, "run_log": { - "name": wrroc_model.name, - "start_time": wrroc_model.startTime, - "end_time": wrroc_model.endTime, + "name": data_validated.name, + "start_time": data_validated.startTime, + "end_time": data_validated.endTime, }, - "state": wrroc_model.status, - "outputs": [{"location": res.id, "name": res.name} for res in result_data], + "state": data_validated.status, + "outputs": [{"location": res.id, "name": res.name} for res in data_validated.result], } - return wes_data + return wes_data \ No newline at end of file diff --git a/crategen/models.py b/crategen/models.py index e35bb89..8e92104 100644 --- a/crategen/models.py +++ b/crategen/models.py @@ -1,9 +1,9 @@ -from pydantic import BaseModel, Field, validator, root_validator -from typing import List, Optional, Dict +from pydantic import BaseModel +from typing import Optional class Executor(BaseModel): image: str - command: List[str] + command: list[str] class TESInputs(BaseModel): url: str @@ -20,11 +20,11 @@ class TESData(BaseModel): id: str name: str description: Optional[str] = "" - executors: List[Executor] - inputs: List[TESInputs] - outputs: List[TESOutputs] + executors: list[Executor] + inputs: list[TESInputs] + outputs: list[TESOutputs] creation_time: str - logs: List[TESLogs] + logs: list[TESLogs] class Config: extra = "forbid" @@ -33,7 +33,7 @@ class WESRunLog(BaseModel): name: Optional[str] = None start_time: Optional[str] = None end_time: Optional[str] = None - cmd: Optional[List[str]] = None + cmd: Optional[list[str]] = None stdout: Optional[str] = None stderr: Optional[str] = None exit_code: Optional[int] = None @@ -43,18 +43,18 @@ class WESOutputs(BaseModel): name: str class WESRequest(BaseModel): - workflow_params: Dict[str, str] + workflow_params: dict[str, str] workflow_type: str workflow_type_version: str - tags: Optional[Dict[str, str]] = None + tags: Optional[dict[str, str]] = None class WESData(BaseModel): run_id: str request: WESRequest state: str run_log: WESRunLog - task_logs: Optional[List[WESRunLog]] = None - outputs: List[WESOutputs] + task_logs: Optional[list[WESRunLog]] = None + outputs: list[WESOutputs] class Config: extra = "forbid" @@ -72,8 +72,21 @@ class WRROCData(BaseModel): name: str description: Optional[str] = "" instrument: Optional[str] = None - object: List[WRROCInputs] - result: List[WRROCOutputs] + object: list[WRROCInputs] + result: list[WRROCOutputs] + startTime: Optional[str] = None + endTime: Optional[str] = None + + class Config: + extra = "forbid" + +class WRROCDataTES(BaseModel): + id: str + name: str + description: Optional[str] = "" + instrument: Optional[str] = None + object: list[WRROCInputs] + result: list[WRROCOutputs] startTime: Optional[str] = None endTime: Optional[str] = None @@ -84,7 +97,7 @@ class WRROCDataWES(BaseModel): id: str name: str status: str - result: List[WRROCOutputs] + result: list[WRROCOutputs] startTime: Optional[str] = None endTime: Optional[str] = None diff --git a/crategen/validators.py b/crategen/validators.py new file mode 100644 index 0000000..31b31f3 --- /dev/null +++ b/crategen/validators.py @@ -0,0 +1,27 @@ +from pydantic import ValidationError +from .models import WRROCData, WRROCDataTES, WRROCDataWES + +def validate_wrroc(data: dict, target_model) -> WRROCData: + """ + Validate that the input data is a valid WRROC entity. + + Args: + data (dict): The input data to validate. + target_model (Type[BaseModel]): The target Pydantic model to validate against. + + Returns: + WRROCData: The validated WRROC data. + + Raises: + ValueError: If the data is not valid WRROC data. + """ + try: + return target_model(**data) + except ValidationError as e: + raise ValueError(f"Invalid WRROC data: {e}") + +def validate_wrroc_tes(data: dict) -> WRROCDataTES: + return validate_wrroc(data, WRROCDataTES) + +def validate_wrroc_wes(data: dict) -> WRROCDataWES: + return validate_wrroc(data, WRROCDataWES) diff --git a/tests/unit/test_wes_converter.py b/tests/unit/test_wes_converter.py index aa2a038..6dbd483 100644 --- a/tests/unit/test_wes_converter.py +++ b/tests/unit/test_wes_converter.py @@ -1,7 +1,6 @@ import unittest from crategen.converters.wes_converter import WESConverter - class TestWESConverter(unittest.TestCase): def setUp(self): self.converter = WESConverter() @@ -60,7 +59,7 @@ def test_convert_from_wrroc_invalid_nested_structure(self): } with self.assertRaises(ValueError) as context: self.converter.convert_from_wrroc(wrroc_data) - self.assertIn("Unexpected fields in WRROC data", str(context.exception)) + self.assertIn("extra fields not permitted", str(context.exception)) def test_convert_from_wrroc_missing_fields(self): wrroc_data = { @@ -92,6 +91,5 @@ def test_convert_to_wrroc_missing_fields(self): self.converter.convert_to_wrroc(wes_data) self.assertIn("Invalid WES data", str(context.exception)) - if __name__ == "__main__": unittest.main() From 1300eb3d8c71374ad6aa5dc1edb13b400a8249dc Mon Sep 17 00:00:00 2001 From: karanjot786 Date: Tue, 6 Aug 2024 20:32:40 +0530 Subject: [PATCH 6/6] test(tes,wes): add comprehensive unit tests --- crategen/converters/tes_converter.py | 4 +-- crategen/converters/wes_converter.py | 10 +++---- crategen/validators.py | 39 ++++++++++++++++++++++++---- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/crategen/converters/tes_converter.py b/crategen/converters/tes_converter.py index 6a098a8..d5a26b5 100644 --- a/crategen/converters/tes_converter.py +++ b/crategen/converters/tes_converter.py @@ -1,7 +1,7 @@ from pydantic import ValidationError from .abstract_converter import AbstractConverter from .utils import convert_to_iso8601 -from ..models import TESData, WRROCDataTES +from ..models import TESData from ..validators import validate_wrroc_tes class TESConverter(AbstractConverter): @@ -27,8 +27,6 @@ def convert_to_wrroc(self, tes_data: dict) -> dict: def convert_from_wrroc(self, data: dict) -> dict: try: data_validated = validate_wrroc_tes(data) - data_filtered = {key: data.get(key) for key in WRROCDataTES.__fields__ if key in data} - data_validated = WRROCDataTES(**data_filtered) except ValidationError as e: raise ValueError(f"Invalid WRROC data: {e}") diff --git a/crategen/converters/wes_converter.py b/crategen/converters/wes_converter.py index 7f7ac0f..ccec409 100644 --- a/crategen/converters/wes_converter.py +++ b/crategen/converters/wes_converter.py @@ -1,8 +1,8 @@ from pydantic import ValidationError from .abstract_converter import AbstractConverter -from ..models import WESData, WRROCDataWES +from ..models import WESData from .utils import convert_to_iso8601 -from ..validators import validate_wrroc +from ..validators import validate_wrroc_wes class WESConverter(AbstractConverter): @@ -26,11 +26,7 @@ def convert_to_wrroc(self, wes_data: dict) -> dict: def convert_from_wrroc(self, data: dict) -> dict: try: - data_validated = validate_wrroc(data, WRROCDataWES) - allowed_fields = set(WRROCDataWES.__fields__.keys()) - unexpected_fields = set(data.keys()) - allowed_fields - if unexpected_fields: - raise ValueError(f"Unexpected fields in WRROC data: {unexpected_fields}") + data_validated = validate_wrroc_wes(data) except ValidationError as e: raise ValueError(f"Invalid WRROC data: {e}") diff --git a/crategen/validators.py b/crategen/validators.py index 31b31f3..845b00c 100644 --- a/crategen/validators.py +++ b/crategen/validators.py @@ -1,13 +1,12 @@ from pydantic import ValidationError from .models import WRROCData, WRROCDataTES, WRROCDataWES -def validate_wrroc(data: dict, target_model) -> WRROCData: +def validate_wrroc(data: dict) -> WRROCData: """ Validate that the input data is a valid WRROC entity. Args: data (dict): The input data to validate. - target_model (Type[BaseModel]): The target Pydantic model to validate against. Returns: WRROCData: The validated WRROC data. @@ -16,12 +15,42 @@ def validate_wrroc(data: dict, target_model) -> WRROCData: ValueError: If the data is not valid WRROC data. """ try: - return target_model(**data) + return WRROCData(**data) except ValidationError as e: raise ValueError(f"Invalid WRROC data: {e}") def validate_wrroc_tes(data: dict) -> WRROCDataTES: - return validate_wrroc(data, WRROCDataTES) + """ + Validate that the input data is a valid WRROC entity for TES. + + Args: + data (dict): The input data to validate. + + Returns: + WRROCDataTES: The validated WRROC data for TES. + + Raises: + ValueError: If the data is not valid WRROC data for TES. + """ + try: + return WRROCDataTES(**data) + except ValidationError as e: + raise ValueError(f"Invalid WRROC data: {e}") def validate_wrroc_wes(data: dict) -> WRROCDataWES: - return validate_wrroc(data, WRROCDataWES) + """ + Validate that the input data is a valid WRROC entity for WES. + + Args: + data (dict): The input data to validate. + + Returns: + WRROCDataWES: The validated WRROC data for WES. + + Raises: + ValueError: If the data is not valid WRROC data for WES. + """ + try: + return WRROCDataWES(**data) + except ValidationError as e: + raise ValueError(f"Invalid WRROC data: {e}")