Skip to content

Commit

Permalink
Warn if non-eo3 dataset has eo3 metadata type (#1523)
Browse files Browse the repository at this point in the history
* warn if non-eo3 dataset has eo3 metadata type

* fix str contains

* add test

---------

Co-authored-by: Ariana Barzinpour <[email protected]>
  • Loading branch information
Ariana-B and Ariana Barzinpour authored Dec 19, 2023
1 parent b872aff commit 6aaea5f
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 0 deletions.
13 changes: 13 additions & 0 deletions datacube/index/hl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"""
High level indexing operations/utilities
"""
import logging

import json
import toolz
from uuid import UUID
Expand All @@ -17,6 +19,8 @@
from datacube.utils.changes import get_doc_changes
from .eo3 import prep_eo3, is_doc_eo3, is_doc_geo # type: ignore[attr-defined]

_LOG = logging.getLogger(__name__)


class ProductRule:
def __init__(self, product: Product, signature: Mapping[str, Any]):
Expand Down Expand Up @@ -148,13 +152,21 @@ def dataset_resolver(index: AbstractIndex,
skip_lineage: bool = False) -> Callable[[SimpleDocNav, str], DatasetOrError]:
match_product = product_matcher(product_matching_rules)

def check_intended_eo3(ds: SimpleDocNav, product: Product) -> None:
# warn if it looks like dataset was meant to be eo3 but is not
if not is_doc_eo3(ds.doc) and ("eo3" in product.metadata_type.name):
_LOG.warning(f"Dataset {ds.id} has a product with an eo3 metadata type, "
"but the dataset definition does not include the $schema field "
"and so will not be recognised as an eo3 dataset.")

def resolve_no_lineage(ds: SimpleDocNav, uri: str) -> DatasetOrError:
doc = ds.doc_without_lineage_sources
try:
product = match_product(doc)
except BadMatch as e:
return None, e

check_intended_eo3(ds, product)
return Dataset(product, doc, uris=[uri], sources={}), None

def resolve(main_ds_doc: SimpleDocNav, uri: str) -> DatasetOrError:
Expand Down Expand Up @@ -222,6 +234,7 @@ def resolve_ds(ds: SimpleDocNav,
else:
product = match_product(doc)

check_intended_eo3(ds, product)
return with_cache(Dataset(product, doc, uris=uris, sources=sources), ds.id, cache)
try:
return remap_lineage_doc(main_ds, resolve_ds, cache={}), None
Expand Down
1 change: 1 addition & 0 deletions docs/about/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ v1.8.next
=========
- Add dataset cli tool ``find-duplicates`` to identify duplicate indexed datasets (:pull:`1517`)
- Make solar_day() timezone aware (:pull:`1521`)
- Warn if non-eo3 dataset has eo3 metadata type (:pull:`1523`)

v1.8.17 (8th November 2023)
===========================
Expand Down
26 changes: 26 additions & 0 deletions integration_tests/test_dataset_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest
import toolz
import yaml
import logging

from datacube.index import Index
from datacube.index.hl import Doc2Dataset
Expand All @@ -15,6 +16,8 @@
from datacube.utils import SimpleDocNav
from datacube.scripts.dataset import _resolve_uri

logger = logging.getLogger(__name__)


def check_skip_lineage_test(clirunner, index):
ds = SimpleDocNav(gen_dataset_test_dag(11, force_tree=True))
Expand Down Expand Up @@ -206,6 +209,29 @@ def test_dataset_add_no_id(dataset_add_configs, index_empty, clirunner):
assert _err == 'No id defined in dataset doc'


@pytest.mark.parametrize('datacube_env_name', ('datacube', ))
def test_dataset_eo3_no_schema(dataset_add_configs, index_empty, clirunner, caplog):
p = dataset_add_configs
index = index_empty
ds = load_dataset_definition(p.datasets_eo3).doc

clirunner(['metadata', 'add', p.metadata])
clirunner(['product', 'add', p.products])

# no warnings if eo3 dataset has $schema
doc2ds = Doc2Dataset(index)
doc2ds(ds, 'file:///something')
warnings = [record for record in caplog.records if record.levelno == logging.WARNING]
assert len(warnings) == 0

# warn if eo3 metadata type but no $schema
del ds["$schema"]
doc2ds(ds, 'file:///something')
warnings = [record for record in caplog.records if record.levelno == logging.WARNING]
assert len(warnings) == 1
assert "will not be recognised as an eo3 dataset" in warnings[0].msg


# Current formulation of this test relies on non-EO3 test data
@pytest.mark.parametrize('datacube_env_name', ('datacube', ))
def test_dataset_add(dataset_add_configs, index_empty, clirunner):
Expand Down

0 comments on commit 6aaea5f

Please sign in to comment.