opendatacube · Ariana-B · Sep 20, 2024 · Aug 30, 2024 · Aug 30, 2024 · Aug 30, 2024
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -9,3 +9,7 @@ updates:
     directory: "/" # Location of package manifests
     schedule:
       interval: "weekly"
+  - package-ecosystem: docker
+    directory: "/docker"
+    schedule:
+      interval: "daily"
diff --git a/.github/workflows/doc-qa.yaml b/.github/workflows/doc-qa.yaml
@@ -1,12 +1,12 @@
 name: Doc QA
 on:
   # Triggers the workflow on pull request events for the main branch
-  pull_request:
-    branches:
-      - 'develop'
+  pull_request_target:
+    types:
+      - opened
 
-  # Allows you to run this workflow manually from the Actions tab
-  workflow_dispatch:
+permissions:
+  pull-requests: write
 
 jobs:
   pyspell:

diff --git a/datacube/drivers/postgis/_api.py b/datacube/drivers/postgis/_api.py
@@ -1525,19 +1525,19 @@ def temporal_extent_by_prod(self, product_id: int) -> tuple[datetime.datetime, d
         query = self.temporal_extent_full().where(Dataset.product_ref == product_id)
         res = self._connection.execute(query)
         for tmin, tmax in res:
-            return (self.time_min.normalise_value(tmin), self.time_max.normalise_value(tmax))
+            return (time_min.normalise_value(tmin), time_max.normalise_value(tmax))
         raise RuntimeError("Product has no datasets and therefore no temporal extent")
 
     def temporal_extent_by_ids(self, ids: Iterable[DSID]) -> tuple[datetime.datetime, datetime.datetime]:
         query = self.temporal_extent_full().where(Dataset.id.in_(ids))
         res = self._connection.execute(query)
         for tmin, tmax in res:
-            return (self.time_min.normalise_value(tmin), self.time_max.normalise_value(tmax))
+            return (time_min.normalise_value(tmin), time_max.normalise_value(tmax))
         raise ValueError("no dataset ids provided")
 
     def temporal_extent_full(self) -> Select:
         # Hardcode eo3 standard time locations - do not use this approach in a legacy index driver.
 
         return select(
-            func.min(self.time_min.alchemy_expression), func.max(self.time_max.alchemy_expression)
+            func.min(time_min.alchemy_expression), func.max(time_max.alchemy_expression)
         )
diff --git a/datacube/index/postgis/_datasets.py b/datacube/index/postgis/_datasets.py
@@ -161,13 +161,19 @@ def add(self, dataset: Dataset,
             return dataset
         with self._db_connection(transaction=True) as transaction:
             # 1a. insert (if not already exists)
-            transaction.insert_dataset(dataset.metadata_doc_without_lineage(), dataset.id, dataset.product.id)
-            # 1b. Prepare spatial index extents
-            transaction.update_spindex(dsids=[dataset.id])
-            transaction.update_search_index(dsids=[dataset.id])
-            # 1c. Store locations
-            if dataset.uri is not None:
-                self._ensure_new_locations(dataset, transaction=transaction)
+            product_id = dataset.product.id
+            if product_id is None:
+                # don't assume the product has an id value since it's optional
+                # but we should error if the product doesn't exist in the db
+                product_id = self.products.get_by_name_unsafe(dataset.product.name).id
+            is_new = transaction.insert_dataset(dataset.metadata_doc_without_lineage(), dataset.id, product_id)
+            if is_new:
+                # 1b. Prepare spatial index extents
+                transaction.update_spindex(dsids=[dataset.id])
+                transaction.update_search_index(dsids=[dataset.id])
+                # 1c. Store locations
+                if dataset.uris is not None:
+                    self._ensure_new_locations(dataset, transaction=transaction)
             if archive_less_mature is not None:
                 self.archive_less_mature(dataset, archive_less_mature)
             if dataset.source_tree is not None:

diff --git a/datacube/index/postgres/_datasets.py b/datacube/index/postgres/_datasets.py
@@ -159,7 +159,12 @@ def process_bunch(dss, main_ds, transaction):
 
             # First insert all new datasets
             for ds in dss:
-                is_new = transaction.insert_dataset(ds.metadata_doc_without_lineage(), ds.id, ds.product.id)
+                product_id = ds.product.id
+                if product_id is None:
+                    # don't assume the product has an id value since it's optional
+                    # but we should error if the product doesn't exist in the db
+                    product_id = self.products.get_by_name_unsafe(ds.product.name).id
+                is_new = transaction.insert_dataset(ds.metadata_doc_without_lineage(), ds.id, product_id)
                 sources = ds.sources
                 if is_new and sources is not None:
                     edges.extend((name, ds.id, src.id)

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -5,8 +5,8 @@
 ## SPDX-License-Identifier: Apache-2.0
 ##
 # gdal:ubuntu-small no longer comes with netcdf support compiled into gdal
-FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.0 as builder
-FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.0
+FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.2 AS builder
+FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.2
 ARG V_PG=16
 ARG V_PGIS=16-postgis-3
 
@@ -34,15 +34,14 @@ RUN apt-get update -y \
 # Build constrained python environment
 
 RUN virtualenv /env
-ENV PYENV /env
-ENV GDAL_CONFIG /usr/bin/gdal-config
+# Set the locale, this is required for some of the Python packages
+ENV PYENV=/env \
+    GDAL_CONFIG=/usr/bin/gdal-config \
+    LC_ALL=C.UTF-8
 
 # Needed to build cf-units wheels.
 ARG UDUNITS2_XML_PATH=/usr/share/xml/udunits/udunits2-common.xml
 
-# Set the locale, this is required for some of the Python packages
-ENV LC_ALL C.UTF-8
-
 COPY docker/constraints.in /conf/requirements.txt
 COPY docker/constraints.txt docker/nobinary.txt /conf/
 

diff --git a/docs/README.rst b/docs/README.rst
@@ -8,22 +8,60 @@ Developing Locally
 
 Requires a Unix like system that includes ``make``.
 
-#. Install NodeJS + NPM
-#. Install Browser Sync
+#. Clone the datacube-core repository. If you don't have permissions to push to the datacube-core library, you will need to fork the repo and clone your fork.
 
 .. code-block:: bash
 
-   npm install -g browser-sync
+   git clone https://github.com/opendatacube/datacube-core.git
 
-#. Install Python dependencies
+#. Check out a new branch for the documentation feature you're working on
 
 .. code-block:: bash
 
-   pip install -r requirements.txt
-   pip install git+https://github.com/carrotandcompany/sphinx-autobuild.git@feature_event_delay
+   git switch -c docs-<feature>
 
-#. Start the auto-building development server.
+#. Change directory to the docs folder
+
+.. code-block:: bash
+
+   cd docs
+
+#. Create a conda environment for python 3.11, with conda-forge as the channel
+
+.. code-block:: bash
+
+   conda create --name datacubecoredocs -c conda-forge python=3.11
+
+#. Activate the conda environment
+
+.. code-block:: bash
+
+   conda activate datacubecoredocs
+
+#. Install pandoc
+
+.. code-block:: bash
+
+   conda install pandoc
+
+#. Install requirements with pip
+
+.. code-block:: bash
+
+   pip install -r requirements.txt 
+
+#. Run the autobuild.
 
 .. code-block:: bash
 
    sphinx-autobuild . _build/html
+
+#. Open a browser and navigate to the URL provided by the autobuild
+
+#. Make changes to the docs. The terminal with the autobuild will continue to update the docs view in the browser.
+
+#. When finished, quit the autobuild process using ``ctrl-c`` in the terminal.
+
+#. Stage and commit your changes.
+
+#. When ready for review, push your changes and create a pull request.
diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst
@@ -19,6 +19,7 @@ v1.9.next
 - Drop valid-area check and anti-meridian fix 3857 extents (:pull:1635)
 - Remove problematic "common_timestamp" postgresql function from postgis driver. Some internal API changes
   required to accommodate and preserve all previous index-driver level behaviour. (:pull:`1623`)
+- Cherry picks from 1.8 (#1624-#1626, #1629-#1631) (:pull:`1637`)
 
 v1.9.0-rc9 (3rd July 2024)
 ==========================
@@ -137,6 +138,7 @@ v1.9.0-rc1 (27th March 2024)
 
 v1.8.next
 =========
+- Don't error when adding a dataset whose product doesn't have an id value (:pull:`1630`)
 
 v1.8.19 (2nd July 2024)
 =======================

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -177,7 +177,7 @@ platformdirs==3.0.0
     # via jupyter-core
 psutil==5.9.4
     # via distributed
-psycopg2==2.9.5
+#psycopg2==2.9.5
     # via datacube (setup.py)
 pydata-sphinx-theme==0.9.0
     # via datacube (setup.py)

diff --git a/integration_tests/index/test_index_data.py b/integration_tests/index/test_index_data.py
@@ -18,7 +18,8 @@
 
 from datacube.index.exceptions import MissingRecordError
 from datacube.index import Index
-from datacube.model import Dataset, MetadataType
+from datacube.model import Dataset, Product, MetadataType
+from datacube.index.eo3 import prep_eo3
 
 _telemetry_uuid = UUID('4ec8fe97-e8b9-11e4-87ff-1040f381a756')
 _telemetry_dataset = {
@@ -258,6 +259,14 @@ def test_get_dataset(index: Index, ls8_eo3_dataset: Dataset) -> None:
                                     'f226a278-e422-11e6-b501-185e0f80a5c1']) == []
 
 
+def test_add_dataset_no_product_id(index: Index, extended_eo3_metadata_type, ls8_eo3_product, eo3_ls8_dataset_doc):
+    product_no_id = Product(extended_eo3_metadata_type, ls8_eo3_product.definition)
+    assert product_no_id.id is None
+    dataset_doc, _ = eo3_ls8_dataset_doc
+    dataset = Dataset(product_no_id, prep_eo3(dataset_doc))
+    assert index.datasets.add(dataset, with_lineage=False)
+
+
 def test_transactions_api_ctx_mgr(index,
                                   extended_eo3_metadata_type_doc,
                                   ls8_eo3_product,

diff --git a/tests/index/test_api_index_dataset.py b/tests/index/test_api_index_dataset.py
@@ -208,6 +208,9 @@ def get(self, *args, **kwargs):
     def get_by_name(self, *args, **kwargs):
         return self.type
 
+    def get_by_name_unsafe(self, *args, **kwargs):
+        return self.type
+
     @contextmanager
     def _db_connection(self, transaction=False):
         yield MockDb()