Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add consolidated metadata support #55

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions zarrs/src/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

use derive_more::Display;
use thiserror::Error;
use zarrs_metadata::v3::group::ConsolidatedMetadata;

use crate::{
config::{
Expand Down Expand Up @@ -150,6 +151,30 @@
(GM::V2(metadata), V::V3) => GM::V3(group_metadata_v2_to_v3(&metadata)),
}
}

/// Get the consolidated metadata. Returns [`None`] if `consolidated_metadata` is absent.
///
/// Consolidated metadata is not currently supported for Zarr V2 groups.
#[must_use]
pub fn consolidated_metadata(&self) -> Option<&ConsolidatedMetadata> {
if let GroupMetadata::V3(group_metadata) = &self.metadata {
group_metadata.consolidated_metadata.as_ref()
} else {
None

Check warning on line 163 in zarrs/src/group.rs

View check run for this annotation

Codecov / codecov/patch

zarrs/src/group.rs#L163

Added line #L163 was not covered by tests
}
}

/// Set the consolidated metadata.
///
/// Consolidated metadata is not currently supported for Zarr V2 groups, and this function is a no-op.
pub fn set_consolidated_metadata(
&mut self,
consolidated_metadata: Option<ConsolidatedMetadata>,
) {
if let GroupMetadata::V3(group_metadata) = &mut self.metadata {
group_metadata.consolidated_metadata = consolidated_metadata;
}
}
}

impl<TStorage: ?Sized + ReadableStorageTraits> Group<TStorage> {
Expand Down
40 changes: 39 additions & 1 deletion zarrs/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
mod node_async;
#[cfg(feature = "async")]
pub use node_async::{async_get_child_nodes, async_node_exists, async_node_exists_listable};
use zarrs_metadata::v3::group::ConsolidatedMetadataMetadata;

use std::sync::Arc;
use std::{collections::HashMap, sync::Arc};

pub use crate::metadata::NodeMetadata;
use thiserror::Error;
Expand Down Expand Up @@ -389,6 +390,43 @@
update_tree(&mut string, &self.children, 1);
string
}

/// Consolidate metadata. Returns [`None`] for an array.
///
/// [`ConsolidatedMetadataMetadata`] can be converted into [`ConsolidatedMetadata`](crate::metadata::v3::group::ConsolidatedMetadata) in [`GroupMetadataV3`](crate::metadata::v3::group::GroupMetadataV3).
#[must_use]
#[allow(clippy::items_after_statements)]
pub fn consolidate_metadata(&self) -> Option<ConsolidatedMetadataMetadata> {
if let NodeMetadata::Array(_) = self.metadata {
// Arrays cannot have consolidated metadata
return None;

Check warning on line 402 in zarrs/src/node.rs

View check run for this annotation

Codecov / codecov/patch

zarrs/src/node.rs#L402

Added line #L402 was not covered by tests
}

fn update_consolidated_metadata(
node_path: &str,
consolidated_metadata: &mut ConsolidatedMetadataMetadata,
children: &[Node],
) {
for child in children {
let relative_path = child
.path()
.as_str()
.strip_prefix(node_path)
.expect("child path should always include the node path");
let relative_path = relative_path.strip_prefix('/').unwrap_or(relative_path);
let relative_path = relative_path.to_string();
consolidated_metadata.insert(relative_path, child.metadata.clone());
update_consolidated_metadata(node_path, consolidated_metadata, &child.children);
}
}
let mut consolidated_metadata = HashMap::default();
update_consolidated_metadata(
self.path().as_str(),
&mut consolidated_metadata,
&self.children,
);
Some(consolidated_metadata)
}
}

#[cfg(test)]
Expand Down
45 changes: 43 additions & 2 deletions zarrs/tests/hierarchy.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#![cfg(feature = "filesystem")]

use std::sync::Arc;

use zarrs::node::Node;
use zarrs_filesystem::FilesystemStore;
use zarrs::{
filesystem::FilesystemStore, group::Group, metadata::v3::group::ConsolidatedMetadata,
node::Node,
};

#[test]
fn hierarchy_tree() {
Expand All @@ -23,3 +27,40 @@ fn hierarchy_tree() {
"
);
}

#[test]
fn consolidated_metadata() {
let store = Arc::new(
FilesystemStore::new("./tests/data/hierarchy.zarr")
.unwrap()
.sorted(),
);
let node = Node::open(&store, "/").unwrap();
let consolidated_metadata = node.consolidate_metadata().unwrap();
println!("{:#?}", consolidated_metadata);

for relative_path in ["a", "a/baz", "a/foo", "b"] {
let consolidated = consolidated_metadata.get(relative_path).unwrap();
let node_path = format!("/{}", relative_path);
let actual = Node::open(&store, &node_path).unwrap();
assert_eq!(consolidated, actual.metadata());
}

let mut group = Group::open(store.clone(), "/").unwrap();
assert!(group.consolidated_metadata().is_none());
group.set_consolidated_metadata(Some(ConsolidatedMetadata {
metadata: consolidated_metadata,
..Default::default()
}));
assert!(group.consolidated_metadata().is_some());

let node = Node::open(&store, "/a").unwrap();
let consolidated_metadata = node.consolidate_metadata().unwrap();
println!("{:#?}", consolidated_metadata);
for relative_path in ["baz", "foo"] {
let consolidated = consolidated_metadata.get(relative_path).unwrap();
let node_path = format!("/a/{}", relative_path);
let actual = Node::open(&store, &node_path).unwrap();
assert_eq!(consolidated, actual.metadata());
}
}
105 changes: 104 additions & 1 deletion zarrs_metadata/src/v3/group.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use std::collections::HashMap;

use derive_more::Display;
use serde::{Deserialize, Serialize};

use crate::NodeMetadata;

use super::AdditionalFields;

/// Zarr group metadata (storage specification v3).
Expand All @@ -18,7 +22,7 @@ use super::AdditionalFields;
/// }
/// }
#[non_exhaustive]
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)]
#[derive(Serialize, Deserialize, Clone, Debug, Display)]
#[display("{}", serde_json::to_string(self).unwrap_or_default())]
pub struct GroupMetadataV3 {
/// An integer defining the version of the storage specification to which the group adheres. Must be `3`.
Expand All @@ -28,11 +32,24 @@ pub struct GroupMetadataV3 {
/// Optional user metadata.
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
pub attributes: serde_json::Map<String, serde_json::Value>,
/// Consolidated metadata.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub consolidated_metadata: Option<ConsolidatedMetadata>,
/// Additional fields.
#[serde(flatten)]
pub additional_fields: AdditionalFields,
}

impl std::cmp::PartialEq for GroupMetadataV3 {
fn eq(&self, other: &Self) -> bool {
self.attributes == other.attributes
// && self.consolidated_metadata == other.consolidated_metadata
&& self.additional_fields == other.additional_fields
}
}

impl Eq for GroupMetadataV3 {}

impl Default for GroupMetadataV3 {
fn default() -> Self {
Self::new(serde_json::Map::new(), AdditionalFields::default())
Expand All @@ -51,6 +68,92 @@ impl GroupMetadataV3 {
node_type: monostate::MustBe!("group"),
attributes,
additional_fields,
consolidated_metadata: None,
}
}
}

/// Consolidated metadata of a Zarr hierarchy.
#[derive(Serialize, Deserialize, Clone, PartialEq, Debug, Display)]
#[display("{}", serde_json::to_string(self).unwrap_or_default())]
pub struct ConsolidatedMetadata {
/// A mapping from node path to Group or Array [`NodeMetadata`] object.
pub metadata: ConsolidatedMetadataMetadata,
/// The kind of the consolidated metadata. Must be `'inline'`. Reserved for future use.
pub kind: ConsolidatedMetadataKind,
/// The boolean literal `false`. Indicates that the field is not required to load the Zarr hierarchy.
pub must_understand: monostate::MustBe!(false),
}

/// The `metadata` field of `consolidated_metadata` in [`GroupMetadataV3`].
pub type ConsolidatedMetadataMetadata = HashMap<String, NodeMetadata>;

impl Default for ConsolidatedMetadata {
fn default() -> Self {
Self {
metadata: HashMap::default(),
kind: ConsolidatedMetadataKind::Inline,
must_understand: monostate::MustBe!(false),
}
}
}

/// The "kind" of consolidated metadata.
#[non_exhaustive]
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)]
pub enum ConsolidatedMetadataKind {
/// Indicates that consolidated metadata is stored inline in the root `zarr.json` object.
#[serde(rename = "inline")]
Inline,
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn group_metadata_consolidated() {
let group_metadata = serde_json::from_str::<GroupMetadataV3>(
r#"{
"zarr_format": 3,
"node_type": "group",
"attributes": {
"spam": "ham",
"eggs": 42
},
"consolidated_metadata": {
"metadata": {
"/subgroup": {
"zarr_format": 3,
"node_type": "group",
"attributes": {
"consolidated": "attributes"
}
}
},
"kind": "inline",
"must_understand": false
}
}"#,
)
.unwrap();
assert_eq!(
group_metadata
.consolidated_metadata
.unwrap()
.metadata
.get("/subgroup")
.unwrap(),
&serde_json::from_str::<NodeMetadata>(
r#"{
"zarr_format": 3,
"node_type": "group",
"attributes": {
"consolidated": "attributes"
}
}"#
)
.unwrap()
);
}
}