diff --git a/py_mmd_tools/mmd_operations.py b/py_mmd_tools/mmd_operations.py index 21663ec0..2913e15a 100644 --- a/py_mmd_tools/mmd_operations.py +++ b/py_mmd_tools/mmd_operations.py @@ -18,6 +18,8 @@ import warnings import datetime_glob +import urllib.parse + def add_metadata_update_info(f, note, type="Minor modification"): """ Add update information """ @@ -34,15 +36,19 @@ def check_csw_catalog(ds_id, nc_file, urls, env, emsg=""): """Search for the dataset with id 'ds_id' in the CSW metadata catalog. """ + payload = { + "service": "CSW", + "version": "2.0.2", + "request": "GetRepositoryItem", + "id": ds_id} + + payload_str = urllib.parse.urlencode(payload, safe=":") + ds_found_and_accessible = False res = requests.get(url=f"https://{urls[env]['csw']}/csw", - params={ - "service": "CSW", - "version": "2.0.2", - "request": "GetRepositoryItem", - "id": ds_id}) + params=payload_str) # TODO: check the data_access urls - if res.status_code == 200: + if res.status_code == 200 and "ExceptionText" not in res.text: ds_found_and_accessible = True else: emsg += f"Could not find dataset in CSW catalog: {nc_file} (id: {ds_id})" @@ -183,10 +189,10 @@ def move_data(mmd_repository_path, old_file_location_base, new_file_location_bas # Update with dmci update dmci_updated = False - if res.status_code == 200 and not dry_run: + if res.status_code == 200 and "OK" in res.text and not dry_run: # be careful with this... res = requests.post(url=f"https://{urls[env]['dmci']}/v1/update", data=data) - if res.status_code == 200: + if res.status_code == 200 and "OK" in res.text: # This should be the case for a dry-run and a valid xml dmci_updated = True else: @@ -199,7 +205,7 @@ def move_data(mmd_repository_path, old_file_location_base, new_file_location_bas elif dmci_updated and dry_run: nc_moved = True - ds_id = f"no.met.{urls[env]['id_namespace']}:{os.path.basename(mmd_orig).split('.')[0]}" + ds_id = f"{urls[env]['id_namespace']}:{os.path.basename(mmd_orig).split('.')[0]}" if not dry_run: ds_found_and_accessible, emsg = check_csw_catalog(ds_id, nc_file, urls, env, emsg=emsg) else: diff --git a/tests/test_mmd_operations.py b/tests/test_mmd_operations.py index 1ae8a90b..3e1306a1 100644 --- a/tests/test_mmd_operations.py +++ b/tests/test_mmd_operations.py @@ -101,6 +101,7 @@ def mock_walk(*a, **k): class MockResponse: status_code = 200 + text = "OK" # Test check for environment in move_data function with pytest.raises(ValueError): @@ -295,6 +296,7 @@ class MockResponse: class MockResponse2: status_code = 200 + text = "" with monkeypatch.context() as mp: mp.setattr("py_mmd_tools.mmd_operations.requests.get", @@ -304,6 +306,30 @@ class MockResponse2: assert msg == "" +@pytest.mark.online +def test_check_dataset_in_met_csw_catalog(): + """Check that a known dataset is found. + """ + ds_id = "no.met:806070da-e9f3-4d03-ba1d-26b843961634" + # Leads to internal server error: + # ds_id = "no.met:aaaffc75-a42f-4bd8-a1f5-c8e8774fd948" + # url: + # "https://data.csw.met.no/csw?service=CSW&version=2.0.2" + # "&request=GetRepositoryItem&id=no.met:aaaffc75-a42f-4bd8-a1f5-c8e8774fd948" + nc_file = "ncfile.nc" + urls = {"prod": {"dmci": "dmci.s-enda.k8s.met.no", + "csw": "data.csw.met.no", + "id_namespace": "no.met"}} + env = "prod" + found, msg = check_csw_catalog(ds_id, nc_file, urls, env) + assert found is True + assert msg == "" + + ds_id = "rubbish" + found, msg = check_csw_catalog(ds_id, nc_file, urls, env) + assert found is False + + @pytest.mark.py_mmd_tools def test_move_data_file(monkeypatch): """Test move_data_file function can't move (working move is diff --git a/tests/test_move_data_script.py b/tests/test_move_data_script.py index 7003d71c..c5f1cabd 100644 --- a/tests/test_move_data_script.py +++ b/tests/test_move_data_script.py @@ -32,6 +32,7 @@ def test_main(dataDir, monkeypatch): class MockResponse: status_code = 200 + text = "OK" with monkeypatch.context() as mp: mp.setattr("py_mmd_tools.mmd_operations.os.path.isdir",