From 7c24ae32f567818c86f528e2b93f6513c1bdc1ca Mon Sep 17 00:00:00 2001 From: Kai Norman Clasen Date: Wed, 10 Jul 2024 09:51:48 +0200 Subject: [PATCH] feat: add ssl4eo-s12 support --- README.md | 226 +++++++++++++++++- flake.nix | 10 + integration_tests/test_python_integration.py | 203 +++++++++++++++- .../VH.tif | Bin .../VV.tif | Bin .../metadata.json | 0 .../VH.tif | Bin .../VV.tif | Bin .../metadata.json | 0 .../B1.tif | Bin .../B11.tif | Bin .../B12.tif | Bin .../B2.tif | Bin .../B3.tif | Bin .../B4.tif | Bin .../B5.tif | Bin .../B6.tif | Bin .../B7.tif | Bin .../B8.tif | Bin .../B8A.tif | Bin .../B9.tif | Bin .../metadata.json | 0 .../B1.tif | Bin .../B11.tif | Bin .../B12.tif | Bin .../B2.tif | Bin .../B3.tif | Bin .../B4.tif | Bin .../B5.tif | Bin .../B6.tif | Bin .../B7.tif | Bin .../B8.tif | Bin .../B8A.tif | Bin .../B9.tif | Bin .../metadata.json | 0 .../B1.tif | Bin .../B10.tif | Bin .../B11.tif | Bin .../B12.tif | Bin .../B2.tif | Bin .../B3.tif | Bin .../B4.tif | Bin .../B5.tif | Bin .../B6.tif | Bin .../B7.tif | Bin .../B8.tif | Bin .../B8A.tif | Bin .../B9.tif | Bin .../metadata.json | 0 .../B1.tif | Bin .../B10.tif | Bin .../B11.tif | Bin .../B12.tif | Bin .../B2.tif | Bin .../B3.tif | Bin .../B4.tif | Bin .../B5.tif | Bin .../B6.tif | Bin .../B7.tif | Bin .../B8.tif | Bin .../B8A.tif | Bin .../B9.tif | Bin .../metadata.json | 0 rico_hdl/rico_hdl.py | 212 ++++++++++++++-- 64 files changed, 612 insertions(+), 39 deletions(-) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/VH.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/VV.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/metadata.json (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/VH.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/VV.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/metadata.json (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B1.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B11.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B12.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B2.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B3.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B4.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B5.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B6.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B7.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B8.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B8A.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B9.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200604T054639_20200604T054831_T43RCP/metadata.json (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B1.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B11.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B12.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B2.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B3.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B4.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B5.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B6.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B7.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B8.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B8A.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B9.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2a/0000200/20200813T054639_20200813T054952_T43RCP/metadata.json (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B1.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B10.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B11.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B12.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B2.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B3.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B4.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B5.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B6.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B7.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B8.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B8A.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B9.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200604T054639_20200604T054831_T43RCP/metadata.json (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B1.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B10.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B11.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B12.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B2.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B3.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B4.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B5.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B6.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B7.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B8.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B8A.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B9.tif (100%) rename integration_tests/tiffs/{SSL4EO => SSL4EO-S12}/s2c/0000200/20200823T054639_20200823T055618_T43RCP/metadata.json (100%) diff --git a/README.md b/README.md index 025a8b8..b8859f2 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,7 @@ Currently, `rico-hdl` supports: - [HySpecNet-11k][hyspecnet] - [UC Merced Land Use][ucmerced] - [EuroSAT][euro] +- [SSL4EO-S12][ssleo-s12] Additional datasets will be added in the near future. @@ -105,18 +106,18 @@ where the dictionary's key is the band name (`B01`, `B12`, `VV`, ...). }, 'S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP': { - 'B01': <120x120 uint16 safetensors image data>, + 'B01': <30x30 uint16 safetensors image data>, 'B02': <120x120 uint16 safetensors image data>, 'B03': <120x120 uint16 safetensors image data>, 'B04': <120x120 uint16 safetensors image data>, - 'B05': <120x120 uint16 safetensors image data>, - 'B06': <120x120 uint16 safetensors image data>, - 'B07': <120x120 uint16 safetensors image data>, + 'B05': <60x60 uint16 safetensors image data>, + 'B06': <60x60 uint16 safetensors image data>, + 'B07': <60x60 uint16 safetensors image data>, 'B08': <120x120 uint16 safetensors image data>, - 'B8A': <120x120 uint16 safetensors image data>, - 'B09': <120x120 uint16 safetensors image data>, - 'B11': <120x120 uint16 safetensors image data>, - 'B12': <120x120 uint16 safetensors image data>, + 'B8A': <60x60 uint16 safetensors image data>, + 'B09': <30x30 uint16 safetensors image data>, + 'B11': <60x60 uint16 safetensors image data>, + 'B12': <60x60 uint16 safetensors image data>, } ``` @@ -477,6 +478,214 @@ tensor = np.stack([safetensor_dict[key] for key in [ assert tensor.shape == (13, 64, 64) ``` +## [SSL4EO-S12][ssl4eo-s12] Example + +First, [download the rico-hdl](#Download) binary and install +the Python [lmdb][pyl] and [saftensors][pys] packages. +Then, to convert the Sentinel-1, Sentinel-2 L1C, and Sentinel-2 L2A +patches from the [SSL4EO-S12][ssl4eo-s12] +dataset into the optimized format, call the application with: + +```bash +rico-hdl ssl4eo-s12 --s1-dir --s2-l1c-dir --s2-l2a-dir --target-dir Encoded-SSL4EO-S12 +``` + +In [SSL4EO-S12][ssl4eo-s12], each band is stored as a separate file with the associate band as a name (`B1.tif`, `B9.tif`, `B10.tif`, `VV.tif`, ...). +The encoder groups all image files with the same name/prefix and stores the data as a [safetensors][s] dictionary, +where the dictionary's key is the band name (`B1`, `B9`, `B10`, `VV`, ...). + +
+ Example Input + +``` + +├── s1 +│ └── 0000200 +│ ├── S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457 +│ │ ├── metadata.json +│ │ ├── VH.tif +│ │ └── VV.tif +│ └── S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C +│ ├── metadata.json +│ ├── VH.tif +│ └── VV.tif +├── s2a +│ └── 0000200 +│ ├── 20200604T054639_20200604T054831_T43RCP +│ │ ├── B1.tif +│ │ ├── B2.tif +│ │ ├── B3.tif +│ │ ├── B4.tif +│ │ ├── B5.tif +│ │ ├── B6.tif +│ │ ├── B7.tif +│ │ ├── B8.tif +│ │ ├── B8A.tif +│ │ ├── B9.tif +│ │ ├── B11.tif +│ │ ├── B12.tif +│ │ └── metadata.json +│ └── 20200813T054639_20200813T054952_T43RCP +│ ├── B1.tif +│ ├── B2.tif +│ ├── B3.tif +│ ├── B4.tif +│ ├── B5.tif +│ ├── B6.tif +│ ├── B7.tif +│ ├── B8.tif +│ ├── B8A.tif +│ ├── B9.tif +│ ├── B11.tif +│ ├── B12.tif +│ └── metadata.json +└── s2c + └── 0000200 + ├── 20200604T054639_20200604T054831_T43RCP + │ ├── B1.tif + │ ├── B2.tif + │ ├── B3.tif + │ ├── B4.tif + │ ├── B5.tif + │ ├── B6.tif + │ ├── B7.tif + │ ├── B8.tif + │ ├── B8A.tif + │ ├── B9.tif + │ ├── B10.tif + │ ├── B11.tif + │ ├── B12.tif + │ └── metadata.json + └── 20200823T054639_20200823T055618_T43RCP + ├── B1.tif + ├── B2.tif + ├── B3.tif + ├── B4.tif + ├── B5.tif + ├── B6.tif + ├── B7.tif + ├── B8.tif + ├── B8A.tif + ├── B9.tif + ├── B10.tif + ├── B11.tif + ├── B12.tif + └── metadata.json +``` + +
+ +
+ LMDB Result + +> [!NOTE] +> We merge the patch directory with the two upper parent directories. +> This path merging ensures that values are unique and that the entire +> SSL4EO-S12 dataset can be stored in a single LMDB database. + +> [!IMPORTANT] +> The authors of SSL4EO-S12 did not ensure that the resulting patches have +> a consistent size! There are some patches that have an additional row/column +> of pixel values. + +``` +'s1_0000200_S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457': + { + 'VH': <264x264 float32 safetensors image data> + 'VV': <264x264 float32 safetensors image data> + }, +'s1_0000200_S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C': + { + 'VH': <264x264 float32 safetensors image data> + 'VV': <264x264 float32 safetensors image data> + }, +'s2a_0000200_20200604T054639_20200604T054831_T43RCP': { + 'B1': <44x44 uint16 safetensors image data> + 'B2': <264x264 uint16 safetensors image data> + 'B3': <264x264 uint16 safetensors image data> + 'B4': <264x264 uint16 safetensors image data> + 'B5': <132x132 uint16 safetensors image data> + 'B6': <132x132 uint16 safetensors image data> + 'B7': <132x132 uint16 safetensors image data> + 'B8': <132x132 uint16 safetensors image data> + 'B8A': <132x132 uint16 safetensors image data> + 'B9': <44x44 uint16 safetensors image data> + 'B10': <44x44 uint16 safetensors image data> + 'B11': <132x132 uint16 safetensors image data> + 'B12': <132x132 uint16 safetensors image data> + }, +'s2a_0000200_20200813T054639_20200813T054952_T43RCP': { + 'B1': <44x44 uint16 safetensors image data> + 'B2': <264x264 uint16 safetensors image data> + 'B3': <264x264 uint16 safetensors image data> + 'B4': <264x264 uint16 safetensors image data> + 'B5': <132x132 uint16 safetensors image data> + 'B6': <132x132 uint16 safetensors image data> + 'B7': <132x132 uint16 safetensors image data> + 'B8': <132x132 uint16 safetensors image data> + 'B8A': <132x132 uint16 safetensors image data> + 'B9': <44x44 uint16 safetensors image data> + 'B10': <44x44 uint16 safetensors image data> + 'B11': <132x132 uint16 safetensors image data> + 'B12': <132x132 uint16 safetensors image data> + }, +'s2c_0000200_20200604T054639_20200604T054831_T43RCP': { + 'B1': <44x44 uint16 safetensors image data> + 'B2': <264x264 uint16 safetensors image data> + 'B3': <264x264 uint16 safetensors image data> + 'B4': <264x264 uint16 safetensors image data> + 'B5': <132x132 uint16 safetensors image data> + 'B6': <132x132 uint16 safetensors image data> + 'B7': <132x132 uint16 safetensors image data> + 'B8': <132x132 uint16 safetensors image data> + 'B8A': <132x132 uint16 safetensors image data> + 'B9': <44x44 uint16 safetensors image data> + 'B11': <132x132 uint16 safetensors image data> + 'B12': <132x132 uint16 safetensors image data> + }, +'s2c_0000200_20200823T054639_20200823T055618_T43RCP': { + 'B1': <44x44 uint16 safetensors image data> + 'B2': <264x264 uint16 safetensors image data> + 'B3': <264x264 uint16 safetensors image data> + 'B4': <264x264 uint16 safetensors image data> + 'B5': <132x132 uint16 safetensors image data> + 'B6': <132x132 uint16 safetensors image data> + 'B7': <132x132 uint16 safetensors image data> + 'B8': <132x132 uint16 safetensors image data> + 'B8A': <132x132 uint16 safetensors image data> + 'B9': <44x44 uint16 safetensors image data> + 'B11': <132x132 uint16 safetensors image data> + 'B12': <132x132 uint16 safetensors image data> + }, +``` + +
+ +The following code shows how to access the converted database: + +```python +import lmdb +# import desired deep-learning library: +# numpy, torch, tensorflow, paddle, flax, mlx +from safetensors.numpy import load +from pathlib import Path + +# path to the encoded dataset/output of rico-hdl +encoded_path = Path("./Encoded-SSL4EO-S12") + +# Make sure to only open the environment once +# and not everytime an item is accessed. +env = lmdb.open(str(encoded_path), readonly=True) + +with env.begin() as txn: + # string encoding is required to map the string to an LMDB key + safetensor_dict = load(txn.get("s2c_0000200_20200823T054639_20200823T055618_T43RCP".encode())) + +rgb_bands = ["B4", "B3", "B2"] +rgb_tensor = np.stack([safetensor_dict[b] for b in rgb_bands]) +assert rgb_tensor.shape == (3, 264, 264) +``` + ## Design @@ -540,3 +749,4 @@ If you use this work, please cite: [pys]: https://github.com/huggingface/safetensors [ucmerced]: http://weegee.vision.ucmerced.edu/datasets/landuse.html [euro]: https://zenodo.org/records/7711810 +[ssl4eo-s12]: https://github.com/zhu-xlab/SSL4EO-S12 diff --git a/flake.nix b/flake.nix index 8beb7d7..c7643d0 100644 --- a/flake.nix +++ b/flake.nix @@ -59,10 +59,17 @@ --prefix PATH : ${pkgs.lib.makeBinPath [pkgs.fd]} ''; meta.mainProgram = "rico-hdl"; + # The SSL4EO-S12 base folder is copied instead of the individual base directories + # as otherwise the directory would be prefixed with the hash of the directory + # and would result in an unpredictable LMDB key name, as the base directory name + # is used for the test. checkPhase = '' export PATH="$out/bin:$PATH" export RICO_HDL_S1_PATH=${./integration_tests/tiffs/BigEarthNet/BigEarthNet-S1} export RICO_HDL_S2_PATH=${./integration_tests/tiffs/BigEarthNet/BigEarthNet-S2} + export RICO_HDL_SSL4EO_S12_S1_PATH=${./integration_tests/tiffs/SSL4EO-S12}/s1 + export RICO_HDL_SSL4EO_S12_S2_L1C_PATH=${./integration_tests/tiffs/SSL4EO-S12}/s2c + export RICO_HDL_SSL4EO_S12_S2_L2A_PATH=${./integration_tests/tiffs/SSL4EO-S12}/s2a export RICO_HDL_HYSPECNET_PATH=${./integration_tests/tiffs/HySpecNet-11k} export RICO_HDL_LMDB_REF_PATH=${./integration_tests/BigEarthNet_LMDB} export RICO_HDL_UC_MERCED_PATH=${./integration_tests/tiffs/UCMerced_LandUse} @@ -132,6 +139,9 @@ env.RICO_HDL_S2_PATH = "${config.env.DEVENV_ROOT}/integration_tests/tiffs/BigEarthNet/BigEarthNet-S2"; env.RICO_HDL_LMDB_REF_PATH = "${config.env.DEVENV_ROOT}/integration_tests/BigEarthNet_LMDB"; env.JUPYTER_PATH = "${pkgs.python3Packages.jupyterlab}/share/jupyter"; + env.RICO_HDL_SSL4EO_S12_S1_PATH = "${config.env.DEVENV_ROOT}/integration_tests/tiffs/SSL4EO-S12/s1"; + env.RICO_HDL_SSL4EO_S12_S2_L1C_PATH = "${config.env.DEVENV_ROOT}/integration_tests/tiffs/SSL4EO-S12/s2c"; + env.RICO_HDL_SSL4EO_S12_S2_L2A_PATH = "${config.env.DEVENV_ROOT}/integration_tests/tiffs/SSL4EO-S12/s2a"; packages = [ (mkPoetryEnv diff --git a/integration_tests/test_python_integration.py b/integration_tests/test_python_integration.py index d8c0225..9e47784 100644 --- a/integration_tests/test_python_integration.py +++ b/integration_tests/test_python_integration.py @@ -21,7 +21,7 @@ def read_single_band_raster(path): @pytest.fixture(scope="session") -def s1_root() -> Path: +def bigearthnet_s1_root() -> Path: str_p = os.environ.get("RICO_HDL_S1_PATH") or "./tiffs/BigEarthNet/BigEarthNet-S1/" p = Path(str_p) assert p.exists() @@ -29,6 +29,15 @@ def s1_root() -> Path: return p +@pytest.fixture(scope="session") +def bigearthnet_s2_root() -> Path: + str_p = os.environ.get("RICO_HDL_S2_PATH") or "./tiffs/BigEarthNet/BigEarthNet-S2/" + p = Path(str_p) + assert p.exists() + assert p.is_dir() + return p + + @pytest.fixture(scope="session") def bigearthnet_lmdb_ref_path() -> Path: str_p = os.environ.get("RICO_HDL_LMDB_REF_PATH") or "./BigEarthNet_LMDB/" @@ -39,8 +48,30 @@ def bigearthnet_lmdb_ref_path() -> Path: @pytest.fixture(scope="session") -def s2_root() -> Path: - str_p = os.environ.get("RICO_HDL_S2_PATH") or "./tiffs/BigEarthNet/BigEarthNet-S2/" +def ssl4eo_s12_s1_root() -> Path: + str_p = os.environ.get("RICO_HDL_SSL4EO_S12_S1_PATH") or "./tiffs/SSL4EO-S12/s1/" + p = Path(str_p) + assert p.exists() + assert p.is_dir() + return p + + +@pytest.fixture(scope="session") +def ssl4eo_s12_s2_l1c_root() -> Path: + str_p = ( + os.environ.get("RICO_HDL_SSL4EO_S12_S2_L1C_PATH") or "./tiffs/SSL4EO-S12/s2c/" + ) + p = Path(str_p) + assert p.exists() + assert p.is_dir() + return p + + +@pytest.fixture(scope="session") +def ssl4eo_s12_s2_l2a_root() -> Path: + str_p = ( + os.environ.get("RICO_HDL_SSL4EO_S12_S2_L2A_PATH") or "./tiffs/SSL4EO-S12/s2a/" + ) p = Path(str_p) assert p.exists() assert p.is_dir() @@ -76,15 +107,35 @@ def eurosat_ms_root() -> Path: # https://docs.pytest.org/en/6.2.x/tmpdir.html#tmpdir-factory-example@pytest.fixture(scope="session") @pytest.fixture -def encoded_bigearthnet_s1_s2_path(s1_root, s2_root, tmpdir_factory) -> Path: +def encoded_bigearthnet_s1_s2_path( + bigearthnet_s1_root, bigearthnet_s2_root, tmpdir_factory +) -> Path: tmp_path = tmpdir_factory.mktemp("lmdb") - # This should make it easier to separately test different versions of the binary and the appimage as well subprocess.run( [ "rico-hdl", "bigearthnet", - f"--bigearthnet-s1-dir={s1_root}", - f"--bigearthnet-s2-dir={s2_root}", + f"--bigearthnet-s1-dir={bigearthnet_s1_root}", + f"--bigearthnet-s2-dir={bigearthnet_s2_root}", + f"--target-dir={tmp_path}", + ], + check=True, + ) + return Path(tmp_path) + + +@pytest.fixture +def encoded_ssl4eo_s12_path( + ssl4eo_s12_s1_root, ssl4eo_s12_s2_l1c_root, ssl4eo_s12_s2_l2a_root, tmpdir_factory +) -> Path: + tmp_path = tmpdir_factory.mktemp("lmdb") + subprocess.run( + [ + "rico-hdl", + "ssl4eo-s12", + f"--s1-dir={ssl4eo_s12_s1_root}", + f"--s2-l1c-dir={ssl4eo_s12_s2_l1c_root}", + f"--s2-l2a-dir={ssl4eo_s12_s2_l2a_root}", f"--target-dir={tmp_path}", ], check=True, @@ -138,10 +189,19 @@ def encoded_eurosat_ms_path(eurosat_ms_root, tmpdir_factory) -> Path: def test_reproducibility_and_data_consistency( - s1_root, s2_root, encoded_bigearthnet_s1_s2_path, bigearthnet_lmdb_ref_path + bigearthnet_s1_root, + bigearthnet_s2_root, + encoded_bigearthnet_s1_s2_path, + bigearthnet_lmdb_ref_path, ): - s1_data = {file: read_single_band_raster(file) for file in s1_root.glob("**/*.tif")} - s2_data = {file: read_single_band_raster(file) for file in s2_root.glob("**/*.tif")} + s1_data = { + file: read_single_band_raster(file) + for file in bigearthnet_s1_root.glob("**/*.tif") + } + s2_data = { + file: read_single_band_raster(file) + for file in bigearthnet_s2_root.glob("**/*.tif") + } source_data = {**s1_data, **s2_data} env = lmdb.open(str(encoded_bigearthnet_s1_s2_path), readonly=True) @@ -175,7 +235,9 @@ def test_reproducibility_and_data_consistency( def test_bigearthnet_integration( - s1_root, s2_root, encoded_bigearthnet_s1_s2_path, bigearthnet_lmdb_ref_path + bigearthnet_s1_root, + bigearthnet_s2_root, + encoded_bigearthnet_s1_s2_path, ): env = lmdb.open(str(encoded_bigearthnet_s1_s2_path), readonly=True) @@ -244,6 +306,125 @@ def test_bigearthnet_integration( ) +def test_ssl4eo_s12_integration( + ssl4eo_s12_s1_root, + ssl4eo_s12_s2_l1c_root, + ssl4eo_s12_s2_l2a_root, + encoded_ssl4eo_s12_path, +): + env = lmdb.open(str(encoded_ssl4eo_s12_path), readonly=True) + + with env.begin(write=False) as txn: + cur = txn.cursor() + decoded_lmdb_data = {k.decode("utf-8"): load(v) for (k, v) in cur} + + assert decoded_lmdb_data.keys() == set( + [ + "s1_0000200_S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457", + "s1_0000200_S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C", + "s2a_0000200_20200604T054639_20200604T054831_T43RCP", + "s2a_0000200_20200813T054639_20200813T054952_T43RCP", + "s2c_0000200_20200604T054639_20200604T054831_T43RCP", + "s2c_0000200_20200823T054639_20200823T055618_T43RCP", + ] + ) + + sample_s1_safetensors_dict = decoded_lmdb_data.get( + "s1_0000200_S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457" + ) + sample_s2_l1c_safetensors_dict = decoded_lmdb_data.get( + "s2c_0000200_20200604T054639_20200604T054831_T43RCP" + ) + sample_s2_l2a_safetensors_dict = decoded_lmdb_data.get( + "s2a_0000200_20200604T054639_20200604T054831_T43RCP" + ) + safetensors_s1_keys = sample_s1_safetensors_dict.keys() + safetensors_s2_l1c_keys = sample_s2_l1c_safetensors_dict.keys() + safetensors_s2_l2a_keys = sample_s2_l2a_safetensors_dict.keys() + assert ( + set( + [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8", + "B8A", + "B9", + "B10", + "B11", + "B12", + ] + ) + == safetensors_s2_l1c_keys + ) + assert ( + set( + [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8", + "B8A", + "B9", + "B11", + "B12", + ] + ) + == safetensors_s2_l2a_keys + ) + assert ( + set( + [ + "VV", + "VH", + ] + ) + == safetensors_s1_keys + ) + + # IMPORTANT! + # The SSL4EO-S12 authors didn't pay attention to the resulting size of the patches! + # Some have an extra row/column of pixels! + # This assertion does NOT hold over the entire dataset! + assert all(arr.shape == (264, 264) for arr in sample_s1_safetensors_dict.values()) + assert all(arr.dtype == "float32" for arr in sample_s1_safetensors_dict.values()) + + assert all(arr.dtype == "uint16" for arr in sample_s2_l1c_safetensors_dict.values()) + assert all( + sample_s2_l1c_safetensors_dict[key].shape == (264, 264) + for key in ["B2", "B3", "B4", "B8"] + ) + assert all( + sample_s2_l1c_safetensors_dict[key].shape == (132, 132) + for key in ["B5", "B6", "B7", "B8A", "B11", "B12"] + ) + assert all( + sample_s2_l1c_safetensors_dict[key].shape == (44, 44) + for key in ["B1", "B9", "B10"] + ) + + assert all(arr.dtype == "uint16" for arr in sample_s2_l2a_safetensors_dict.values()) + assert all( + sample_s2_l2a_safetensors_dict[key].shape == (264, 264) + for key in ["B2", "B3", "B4", "B8"] + ) + assert all( + sample_s2_l2a_safetensors_dict[key].shape == (132, 132) + for key in ["B5", "B6", "B7", "B8A", "B11", "B12"] + ) + assert all( + sample_s2_l2a_safetensors_dict[key].shape == (44, 44) for key in ["B1", "B9"] + ) + + def test_hyspecnet_integration(hyspecnet_root, encoded_hyspecnet_path): env = lmdb.open(str(encoded_hyspecnet_path), readonly=True) diff --git a/integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/VH.tif b/integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/VH.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/VH.tif rename to integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/VH.tif diff --git a/integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/VV.tif b/integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/VV.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/VV.tif rename to integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/VV.tif diff --git a/integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/metadata.json b/integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/metadata.json similarity index 100% rename from integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/metadata.json rename to integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/metadata.json diff --git a/integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/VH.tif b/integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/VH.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/VH.tif rename to integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/VH.tif diff --git a/integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/VV.tif b/integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/VV.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/VV.tif rename to integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/VV.tif diff --git a/integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/metadata.json b/integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/metadata.json similarity index 100% rename from integration_tests/tiffs/SSL4EO/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/metadata.json rename to integration_tests/tiffs/SSL4EO-S12/s1/0000200/S1A_IW_GRDH_1SDV_20200903T131212_20200903T131237_034195_03F8F5_AC1C/metadata.json diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B1.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B1.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B1.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B1.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B11.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B11.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B11.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B11.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B12.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B12.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B12.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B12.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B2.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B2.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B2.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B2.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B3.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B3.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B3.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B3.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B4.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B4.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B4.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B4.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B5.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B5.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B5.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B5.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B6.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B6.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B6.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B6.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B7.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B7.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B7.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B7.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B8.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B8.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B8.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B8.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B8A.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B8A.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B8A.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B8A.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B9.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B9.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B9.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/B9.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/metadata.json b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/metadata.json similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200604T054639_20200604T054831_T43RCP/metadata.json rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200604T054639_20200604T054831_T43RCP/metadata.json diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B1.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B1.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B1.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B1.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B11.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B11.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B11.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B11.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B12.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B12.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B12.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B12.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B2.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B2.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B2.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B2.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B3.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B3.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B3.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B3.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B4.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B4.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B4.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B4.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B5.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B5.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B5.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B5.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B6.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B6.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B6.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B6.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B7.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B7.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B7.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B7.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B8.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B8.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B8.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B8.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B8A.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B8A.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B8A.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B8A.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B9.tif b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B9.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B9.tif rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/B9.tif diff --git a/integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/metadata.json b/integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/metadata.json similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2a/0000200/20200813T054639_20200813T054952_T43RCP/metadata.json rename to integration_tests/tiffs/SSL4EO-S12/s2a/0000200/20200813T054639_20200813T054952_T43RCP/metadata.json diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B1.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B1.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B1.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B1.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B10.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B10.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B10.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B10.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B11.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B11.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B11.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B11.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B12.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B12.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B12.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B12.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B2.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B2.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B2.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B2.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B3.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B3.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B3.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B3.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B4.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B4.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B4.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B4.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B5.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B5.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B5.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B5.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B6.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B6.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B6.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B6.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B7.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B7.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B7.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B7.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B8.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B8.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B8.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B8.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B8A.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B8A.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B8A.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B8A.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B9.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B9.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B9.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/B9.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/metadata.json b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/metadata.json similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200604T054639_20200604T054831_T43RCP/metadata.json rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200604T054639_20200604T054831_T43RCP/metadata.json diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B1.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B1.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B1.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B1.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B10.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B10.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B10.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B10.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B11.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B11.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B11.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B11.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B12.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B12.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B12.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B12.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B2.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B2.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B2.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B2.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B3.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B3.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B3.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B3.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B4.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B4.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B4.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B4.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B5.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B5.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B5.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B5.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B6.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B6.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B6.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B6.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B7.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B7.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B7.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B7.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B8.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B8.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B8.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B8.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B8A.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B8A.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B8A.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B8A.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B9.tif b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B9.tif similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B9.tif rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/B9.tif diff --git a/integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/metadata.json b/integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/metadata.json similarity index 100% rename from integration_tests/tiffs/SSL4EO/s2c/0000200/20200823T054639_20200823T055618_T43RCP/metadata.json rename to integration_tests/tiffs/SSL4EO-S12/s2c/0000200/20200823T054639_20200823T055618_T43RCP/metadata.json diff --git a/rico_hdl/rico_hdl.py b/rico_hdl/rico_hdl.py index b998b84..fcb5752 100644 --- a/rico_hdl/rico_hdl.py +++ b/rico_hdl/rico_hdl.py @@ -1,6 +1,6 @@ import os import typer -from typing import TypeAlias +from typing import TypeAlias, Optional from typing_extensions import Annotated import lmdb from safetensors.numpy import save @@ -33,12 +33,46 @@ "B09", ] +SSL4EO_S12_S1_ORDERING = ["VH", "VV"] +SSL4EO_S12_S2_L1C_ORDERING = [ + "B2", + "B3", + "B4", + "B8", + "B5", + "B6", + "B7", + "B8A", + "B10", + "B11", + "B12", + "B1", + "B9", +] + +SSL4EO_S12_S2_L2A_ORDERING = [ + "B2", + "B3", + "B4", + "B8", + "B5", + "B6", + "B7", + "B8A", + "B11", + "B12", + "B1", + "B9", +] + # Defined in the order of the bands! # Order taken from (and only implicitely confirmed in): # https://github.com/phelber/EuroSAT/issues/7#issuecomment-916754970 # Visualizing the individual bands supports the ordering, as one # can see the different interpolation strengths for the 20 and 60m # bands. +# This should be index band mapping and the ordering within the saftensor +# can then be independent EUROSAT_MS_BANDS = [ "B01", "B02", @@ -110,26 +144,45 @@ def read_single_band_raster(path: Path, index: int = 1, is_georeferenced: bool = return r.read(index) -def s2_read_tif(path: Path): - # could also have the logic to try out .tiff - if not path.exists(): - raise Exception( - f"Could not find file: {path}\nThe S2 dataset is probably incomplete/broken!" - ) - return read_single_band_raster(path) +def bigearthnet_s2_to_safetensor(patch_path: str) -> bytes: + """ + Given the path to a BigEarthNet-S2 patch directory + (NOT the individual TIFF files), read the individual + band files in a pre-defined order and convert it + into a serialized safetensor dictionary. + """ + # In Python the dictionary insertion order is stable! + # order the data here to make it clear that we are doing it + # to order the safetensor entries! + p = Path(patch_path) + data = { + band: read_single_band_raster(p.joinpath(f"{p.stem}_{band}.tif")) + for band in BIGEARTHNET_S2_ORDERING + } + return save(data, metadata=None) -def s1_read_tif(path: Path): - if not path.exists(): - raise Exception( - f"Could not find file: {path}\nThe S1 dataset is probably incomplete/broken!" - ) - return read_single_band_raster(path) +def ssl4eo_s1_to_safetensor(patch_path: str) -> bytes: + """ + Given the path to a SSL4EO-S12-S1 patch directory + (NOT the individual TIFF files), read the individual + band files in a pre-defined order and convert it + into a serialized safetensor dictionary. + """ + # In Python the dictionary insertion order is stable! + # order the data here to make it clear that we are doing it + # to order the safetensor entries! + p = Path(patch_path) + data = { + band: read_single_band_raster(p.joinpath(f"{band}.tif")) + for band in SSL4EO_S12_S1_ORDERING + } + return save(data, metadata=None) -def bigearthnet_s2_to_safetensor(patch_path: str) -> bytes: +def ssl4eo_s2_l1c_to_safetensor(patch_path: str) -> bytes: """ - Given the path to a BigEarthNet-S2 patch directory + Given the path to a SSL4EO-S12-S2 L1C patch directory (NOT the individual TIFF files), read the individual band files in a pre-defined order and convert it into a serialized safetensor dictionary. @@ -139,8 +192,26 @@ def bigearthnet_s2_to_safetensor(patch_path: str) -> bytes: # to order the safetensor entries! p = Path(patch_path) data = { - band: s2_read_tif(p.joinpath(f"{p.stem}_{band}.tif")) - for band in BIGEARTHNET_S2_ORDERING + band: read_single_band_raster(p.joinpath(f"{band}.tif")) + for band in SSL4EO_S12_S2_L1C_ORDERING + } + return save(data, metadata=None) + + +def ssl4eo_s2_l2a_to_safetensor(patch_path: str) -> bytes: + """ + Given the path to a SSL4EO-S12-S2 L2A patch directory + (NOT the individual TIFF files), read the individual + band files in a pre-defined order and convert it + into a serialized safetensor dictionary. + """ + # In Python the dictionary insertion order is stable! + # order the data here to make it clear that we are doing it + # to order the safetensor entries! + p = Path(patch_path) + data = { + band: read_single_band_raster(p.joinpath(f"{band}.tif")) + for band in SSL4EO_S12_S2_L2A_ORDERING } return save(data, metadata=None) @@ -195,11 +266,12 @@ def uc_merced( lmdb_writer(env, patch_paths, encode_stem, uc_merced_to_safetensor) +# I will only add support for the RGB version if somebody explicitely asks +# for it. I want to encourage users to use the actual tiff data instead. @app.command() def eurosat_multi_spectral( target_dir: TargetDir, dataset_dir: DatasetDir, - # RGB ? ): """ [EuroSAT Multi-Spectral](https://doi.org/10.5281/zenodo.7711810) converter. @@ -265,6 +337,19 @@ def encode_stem(path: str) -> bytes: return str(Path(path).stem).encode() +# yeah, this could be done more generic but it can still be refactored +# if it is really needed in different variations. +def encode_three_levels(path: str, join_char: str = "_") -> bytes: + """ + Given a path that is at least three levels deep, concatenate the names + of the three deepest names. + + Example: `/home/user/name/patch` -> `user_name_patch` + """ + p = Path(path) + return join_char.join([p.parent.parent.name, p.parent.name, p.name]).encode() + + def eurosat_ms_to_safetensor(patch_path: str) -> bytes: """ Given the path to a multi-spectral EuroSAT patch file (`.tif` file), @@ -323,6 +408,7 @@ def fast_find( search_directory: str, only_dir: bool = True, threads: int = os.cpu_count(), + exact_depth: Optional[int] = None, ) -> list[str]: """ Use `fd` to quickly find all files/directories that match a given regular expression. @@ -341,7 +427,8 @@ def fast_find( "--regex", regex, ] - + (["--type=directory"] if only_dir else []), + + (["--type=directory"] if only_dir else []) + + ([f"--exact-depth={exact_depth}"] if exact_depth is not None else []), text=True, ).splitlines() @@ -397,6 +484,91 @@ def bigearthnet( lmdb_writer(env, s2_patch_paths, encode_stem, bigearthnet_s2_to_safetensor) +@app.command() +def ssl4eo_s12( + target_dir: TargetDir, + s1_dir: DatasetDir = None, + s2_l1c_dir: DatasetDir = None, + s2_l2a_dir: DatasetDir = None, +): + """ + [SSL4EO-S12 Sentinel-1, Sentinel-2 L1C, and Sentinel-2 L2A](https://github.com/zhu-xlab/SSL4EO_S12-S12) converter. + If all source directories are given, they will be written to the same LMDB file. + + The LMDB keys will be the normalized path to the patches + (i.e., no `_BXY.tif` suffix). + An example key for the path: + - `s1/0000200/S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457/` + would be + - `s1_0000200_S1A_IW_GRDH_1SDV_20200607T010800_20200607T010825_032904_03CFBA_D457` + + and for the path: + - `s2a/0000200/20200604T054639_20200604T054831_T43RCP` + would be + - `s2a_0000200_20200604T054639_20200604T054831_T43RCP` + + The `safetensors` keys relate to the associate band (for example: `B1`, `B8A`, `VV`, `B10`), + which depends on the selected sub-dataset. + + NOTE: We recommend to download the dataset from huggingface, as the download is much more reliable. + To unpack the data simply run `cat s1*.tar.gz | tar -xzf -` + """ + log.debug("Will first collect all files and ensure that some patches are found.") + + if (s1_dir is None) and (s2_l1c_dir is None) and (s2_l2a_dir is None): + log.error("Please provide at least one directory path") + exit(-1, "No source directory is specified") + + if s1_dir is not None: + log.info(f"Searching for patches in: {s1_dir}") + # use fastest matching logic; will fail if directory has been touched or changed + s1_patch_paths = fast_find(".", s1_dir, only_dir=True, exact_depth=2) + num_s1_patch_paths = len(s1_patch_paths) + log.debug(f"Found {num_s1_patch_paths} S1 patches.") + assert num_s1_patch_paths > 0 + + if s2_l1c_dir is not None: + log.info(f"Seaching for patches in: {s2_l1c_dir}") + s2_l1c_patch_paths = fast_find(".", s2_l1c_dir, only_dir=True, exact_depth=2) + # use fastest matching logic; will fail if directory has been touched or changed + num_s2_l1c_patch_paths = len(s2_l1c_patch_paths) + log.debug(f"Found {num_s2_l1c_patch_paths} S2 L1C patches.") + assert num_s2_l1c_patch_paths > 0 + + if s2_l2a_dir is not None: + log.info(f"Seaching for patches in: {s2_l2a_dir}") + s2_l2a_patch_paths = fast_find(".", s2_l2a_dir, only_dir=True, exact_depth=2) + # use fastest matching logic; will fail if directory has been touched or changed + num_s2_l2a_patch_paths = len(s2_l2a_patch_paths) + log.debug(f"Found {num_s2_l2a_patch_paths} S2 L2A patches.") + assert num_s2_l2a_patch_paths > 0 + + # postpone writing until AFTER both dataset files have been assembled. + # Otherwise an error in the latter CLI argument could produce an incomplete LMDB + env = open_lmdb(target_dir) + + # Above we are matching all directories that are two levels deep relative to + # the given base directory. As the s2-l2a and s2-l1c sub-paths are identical + # for a given tile, we need to embed the base directory name `s2c` and `s2a` + # to allow writing a single LMDB file. + # For consistency, we do the same for the S1 data + if s1_dir is not None: + log.debug("Writing SSL4EO-S12-S1 data into LMDB") + lmdb_writer(env, s1_patch_paths, encode_three_levels, ssl4eo_s1_to_safetensor) + + if s2_l1c_dir is not None: + log.debug("Writing SSL4EO-S12-S2 L1C data into LMDB") + lmdb_writer( + env, s2_l1c_patch_paths, encode_three_levels, ssl4eo_s2_l1c_to_safetensor + ) + + if s2_l2a_dir is not None: + log.debug("Writing SSL4EO-S12-S2 L2A data into LMDB") + lmdb_writer( + env, s2_l2a_patch_paths, encode_three_levels, ssl4eo_s2_l2a_to_safetensor + ) + + def lmdb_writer(env, paths, lmdb_key_extractor_func, safetensor_generator): """ A parallel LMDB writer.