-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
272 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../rico_hdl/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,9 @@ | ||
# I am kinda duplicating code. | ||
# The main goal of these tests is to ensure that the data is not empty | ||
# which may happen over time. | ||
# And that the data can be loaded from the LMDB afterwards without any issues. | ||
# This should ensure that the safetensor format also remains functional and reproducible | ||
# But I do not think that I have to check if all encoded arrays remain identical for all datasets. | ||
import lmdb | ||
import rasterio | ||
import numpy as np | ||
|
@@ -7,6 +13,7 @@ | |
import pytest | ||
import subprocess | ||
import hashlib | ||
from rico_hdl.rico_hdl import EUROSAT_MS_BANDS | ||
|
||
|
||
def read_single_band_raster(path): | ||
|
@@ -59,6 +66,15 @@ def uc_merced_root() -> Path: | |
return p | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def eurosat_ms_root() -> Path: | ||
str_p = os.environ.get("RICO_HDL_EUROSAT_MS_PATH") or "./tiffs/EUROSAT_MS/" | ||
p = Path(str_p) | ||
assert p.exists() | ||
assert p.is_dir() | ||
return p | ||
|
||
|
||
# https://docs.pytest.org/en/6.2.x/tmpdir.html#[email protected](scope="session") | ||
@pytest.fixture | ||
def encoded_bigearthnet_s1_s2_path(s1_root, s2_root, tmpdir_factory) -> Path: | ||
|
@@ -107,6 +123,21 @@ def encoded_uc_merced_path(uc_merced_root, tmpdir_factory) -> Path: | |
return Path(tmp_path) | ||
|
||
|
||
@pytest.fixture | ||
def encoded_eurosat_ms_path(eurosat_ms_root, tmpdir_factory) -> Path: | ||
tmp_path = tmpdir_factory.mktemp("eurosat_ms_lmdb") | ||
subprocess.run( | ||
[ | ||
"rico-hdl", | ||
"eurosat-multi-spectral", | ||
f"--dataset-dir={eurosat_ms_root}", | ||
f"--target-dir={tmp_path}", | ||
], | ||
check=True, | ||
) | ||
return Path(tmp_path) | ||
|
||
|
||
def test_bigearthnet_integration( | ||
s1_root, s2_root, encoded_bigearthnet_s1_s2_path, bigearthnet_lmdb_ref_path | ||
): | ||
|
@@ -225,3 +256,42 @@ def test_uc_merced_integration(uc_merced_root, encoded_uc_merced_path): | |
np.array_equal(source_data, decoded_dict[source_key]) | ||
for decoded_dict in decoded_dicts | ||
), f"Couldn't find data in the LMDB database that matches the data from: {source_file}:{source_key}" | ||
|
||
|
||
def read_all_eurosat_ms_bands(path): | ||
""" | ||
Given a path to a TIFF file return all bands as a dictionary, | ||
where the keys are the EuroSAT MS band value | ||
""" | ||
with rasterio.open(path) as r: | ||
return {key: r.read(i) for i, key in enumerate(EUROSAT_MS_BANDS, start=1)} | ||
|
||
|
||
def test_eurosat_integration(eurosat_ms_root, encoded_eurosat_ms_path): | ||
source_file_data = { | ||
file: read_all_eurosat_ms_bands(file) | ||
for file in eurosat_ms_root.glob("**/*.tif") | ||
} | ||
assert len(source_file_data) > 0 | ||
|
||
env = lmdb.open(str(encoded_eurosat_ms_path), readonly=True) | ||
|
||
with env.begin(write=False) as txn: | ||
cur = txn.cursor() | ||
decoded_lmdb_data = {k.decode("utf-8"): load(v) for (k, v) in cur} | ||
|
||
# The encoded data is nested inside of another safetensor dictionary, | ||
# where the inner keys are derived from color mapping | ||
decoded_dicts = [d for d in decoded_lmdb_data.values()] | ||
|
||
# Simply check if the data remains identical, as this is the only _true_ thing I care about from the Python viewpoint | ||
# Here I iterate over all file name and raster data as dictionaries pairs | ||
# and then for each raster data dictionary iterate over all key-value pairs, where the key is the band name | ||
# in the same style as the LMDB file and check if the LMDB file contained a matching array from | ||
# a safetensors dictionary accessed via the shared band name as key. | ||
for source_file, source_data_dict in source_file_data.items(): | ||
for source_key, source_data in source_data_dict.items(): | ||
assert any( | ||
np.array_equal(source_data, decoded_dict[source_key]) | ||
for decoded_dict in decoded_dicts | ||
), f"Couldn't find data in the LMDB database that matches the data from: {source_file}:{source_key}" |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
Oops, something went wrong.