From e1be8831ffaf872c8d4778a6f3b5a6747e0bf93e Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 24 Aug 2024 17:21:49 +1000 Subject: [PATCH] Add `consolidated_metadata` serialisation and deserialisation Add `Node::consolidate_metadata()` Add `Group::[set_]consolidated_metadata()` --- zarrs/src/group.rs | 25 ++++++++ zarrs/src/node.rs | 28 ++++++++- zarrs/tests/hierarchy.rs | 33 ++++++++++- zarrs_metadata/src/v3/group.rs | 105 ++++++++++++++++++++++++++++++++- 4 files changed, 187 insertions(+), 4 deletions(-) diff --git a/zarrs/src/group.rs b/zarrs/src/group.rs index 07ab1e11..50eb57c3 100644 --- a/zarrs/src/group.rs +++ b/zarrs/src/group.rs @@ -30,6 +30,7 @@ use std::sync::Arc; use derive_more::Display; use thiserror::Error; +use zarrs_metadata::v3::group::ConsolidatedMetadata; use crate::{ config::{ @@ -150,6 +151,30 @@ impl Group { (GM::V2(metadata), V::V3) => GM::V3(group_metadata_v2_to_v3(&metadata)), } } + + /// Get the consolidated metadata. Returns [`None`] if `consolidated_metadata` is absent. + /// + /// Consolidated metadata is not currently supported for Zarr V2 groups. + #[must_use] + pub fn consolidated_metadata(&self) -> Option<&ConsolidatedMetadata> { + if let GroupMetadata::V3(group_metadata) = &self.metadata { + group_metadata.consolidated_metadata.as_ref() + } else { + None + } + } + + /// Set the consolidated metadata. + /// + /// Consolidated metadata is not currently supported for Zarr V2 groups, and this function is a no-op. + pub fn set_consolidated_metadata( + &mut self, + consolidated_metadata: Option, + ) { + if let GroupMetadata::V3(group_metadata) = &mut self.metadata { + group_metadata.consolidated_metadata = consolidated_metadata; + } + } } impl Group { diff --git a/zarrs/src/node.rs b/zarrs/src/node.rs index 778e493b..d89ecf58 100644 --- a/zarrs/src/node.rs +++ b/zarrs/src/node.rs @@ -24,8 +24,9 @@ pub use key::{ mod node_async; #[cfg(feature = "async")] pub use node_async::{async_get_child_nodes, async_node_exists, async_node_exists_listable}; +use zarrs_metadata::v3::group::ConsolidatedMetadataMetadata; -use std::sync::Arc; +use std::{collections::HashMap, sync::Arc}; pub use crate::metadata::NodeMetadata; use thiserror::Error; @@ -389,6 +390,31 @@ impl Node { update_tree(&mut string, &self.children, 1); string } + + /// Consolidate metadata. Returns [`None`] for an array. + /// + /// [`ConsolidatedMetadataMetadata`] can be converted into [`ConsolidatedMetadata`](crate::metadata::v3::group::ConsolidatedMetadata) in [`GroupMetadataV3`](crate::metadata::v3::group::GroupMetadataV3). + #[must_use] + #[allow(clippy::items_after_statements)] + pub fn consolidate_metadata(&self) -> Option { + if let NodeMetadata::Array(_) = self.metadata { + // Arrays cannot have consolidated metadata + return None; + } + + fn update_consolidated_metadata( + consolidated_metadata: &mut ConsolidatedMetadataMetadata, + children: &[Node], + ) { + for child in children { + consolidated_metadata.insert(child.path().to_string(), child.metadata.clone()); + update_consolidated_metadata(consolidated_metadata, &child.children); + } + } + let mut consolidated_metadata = HashMap::default(); + update_consolidated_metadata(&mut consolidated_metadata, &self.children); + Some(consolidated_metadata) + } } #[cfg(test)] diff --git a/zarrs/tests/hierarchy.rs b/zarrs/tests/hierarchy.rs index 45bab6aa..22f24abc 100644 --- a/zarrs/tests/hierarchy.rs +++ b/zarrs/tests/hierarchy.rs @@ -1,7 +1,11 @@ +#![cfg(feature = "filesystem")] + use std::sync::Arc; -use zarrs::node::Node; -use zarrs_filesystem::FilesystemStore; +use zarrs::{ + filesystem::FilesystemStore, group::Group, metadata::v3::group::ConsolidatedMetadata, + node::Node, +}; #[test] fn hierarchy_tree() { @@ -23,3 +27,28 @@ fn hierarchy_tree() { " ); } + +#[test] +fn consolidated_metadata() { + let store = Arc::new( + FilesystemStore::new("./tests/data/hierarchy.zarr") + .unwrap() + .sorted(), + ); + let node = Node::open(&store, "/").unwrap(); + let consolidated_metadata = node.consolidate_metadata().unwrap(); + + for node_path in ["/a/baz", "/a/foo", "/b"] { + let consolidated = consolidated_metadata.get(node_path).unwrap(); + let actual = Node::open(&store, node_path).unwrap(); + assert_eq!(consolidated, actual.metadata()); + } + + let mut group = Group::open(store, "/").unwrap(); + assert!(group.consolidated_metadata().is_none()); + group.set_consolidated_metadata(Some(ConsolidatedMetadata { + metadata: consolidated_metadata, + ..Default::default() + })); + assert!(group.consolidated_metadata().is_some()); +} diff --git a/zarrs_metadata/src/v3/group.rs b/zarrs_metadata/src/v3/group.rs index 2375aafd..6dfb674c 100644 --- a/zarrs_metadata/src/v3/group.rs +++ b/zarrs_metadata/src/v3/group.rs @@ -1,6 +1,10 @@ +use std::collections::HashMap; + use derive_more::Display; use serde::{Deserialize, Serialize}; +use crate::NodeMetadata; + use super::AdditionalFields; /// Zarr group metadata (storage specification v3). @@ -18,7 +22,7 @@ use super::AdditionalFields; /// } /// } #[non_exhaustive] -#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)] +#[derive(Serialize, Deserialize, Clone, Debug, Display)] #[display("{}", serde_json::to_string(self).unwrap_or_default())] pub struct GroupMetadataV3 { /// An integer defining the version of the storage specification to which the group adheres. Must be `3`. @@ -28,11 +32,24 @@ pub struct GroupMetadataV3 { /// Optional user metadata. #[serde(default, skip_serializing_if = "serde_json::Map::is_empty")] pub attributes: serde_json::Map, + /// Consolidated metadata. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub consolidated_metadata: Option, /// Additional fields. #[serde(flatten)] pub additional_fields: AdditionalFields, } +impl std::cmp::PartialEq for GroupMetadataV3 { + fn eq(&self, other: &Self) -> bool { + self.attributes == other.attributes + // && self.consolidated_metadata == other.consolidated_metadata + && self.additional_fields == other.additional_fields + } +} + +impl Eq for GroupMetadataV3 {} + impl Default for GroupMetadataV3 { fn default() -> Self { Self::new(serde_json::Map::new(), AdditionalFields::default()) @@ -51,6 +68,92 @@ impl GroupMetadataV3 { node_type: monostate::MustBe!("group"), attributes, additional_fields, + consolidated_metadata: None, } } } + +/// Consolidated metadata of a Zarr hierarchy. +#[derive(Serialize, Deserialize, Clone, PartialEq, Debug, Display)] +#[display("{}", serde_json::to_string(self).unwrap_or_default())] +pub struct ConsolidatedMetadata { + /// A mapping from node path to Group or Array [`NodeMetadata`] object. + pub metadata: ConsolidatedMetadataMetadata, + /// The kind of the consolidated metadata. Must be `'inline'`. Reserved for future use. + pub kind: ConsolidatedMetadataKind, + /// The boolean literal `false`. Indicates that the field is not required to load the Zarr hierarchy. + pub must_understand: monostate::MustBe!(false), +} + +/// The `metadata` field of `consolidated_metadata` in [`GroupMetadataV3`]. +pub type ConsolidatedMetadataMetadata = HashMap; + +impl Default for ConsolidatedMetadata { + fn default() -> Self { + Self { + metadata: HashMap::default(), + kind: ConsolidatedMetadataKind::Inline, + must_understand: monostate::MustBe!(false), + } + } +} + +/// The "kind" of consolidated metadata. +#[non_exhaustive] +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)] +pub enum ConsolidatedMetadataKind { + /// Indicates that consolidated metadata is stored inline in the root `zarr.json` object. + #[serde(rename = "inline")] + Inline, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn group_metadata_consolidated() { + let group_metadata = serde_json::from_str::( + r#"{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "spam": "ham", + "eggs": 42 + }, + "consolidated_metadata": { + "metadata": { + "/subgroup": { + "zarr_format": 3, + "node_type": "group", + "attributes": { + "consolidated": "attributes" + } + } + }, + "kind": "inline", + "must_understand": false + } + }"#, + ) + .unwrap(); + assert_eq!( + group_metadata + .consolidated_metadata + .unwrap() + .metadata + .get("/subgroup") + .unwrap(), + &serde_json::from_str::( + r#"{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "consolidated": "attributes" + } + }"# + ) + .unwrap() + ); + } +}