Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rename functionality for stores. #72

Merged
merged 3 commits into from
Sep 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ List.map GroupNode.to_path g;;

FilesystemStore.erase_group_node store group_node;;
FilesystemStore.erase_all_nodes store;; (* clears the store *)
FilesystemStore.rename_group store group_node;;
FilesystemStore.rename_array store anode;;
```

[1]: https://codecov.io/gh/zoj613/zarr-ml/graph/badge.svg?token=KOOG2Y1SH5
Expand Down
88 changes: 37 additions & 51 deletions examples/inmemory_zipstore.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
It supports both read and write operations. This is because the
underlying Zip library used reads all Zip file bytes into memory. All
store updates are done in-memory and thus to update the actual zip file
we must persist the data using `MemoryZipStore.write_to_file`.
we must write the update bytes to disk. The `with_open` convenience
function serves this purpose; it ensures that any updates to the store
are written to the zip file upon exit.

The main requirement is to implement the signature of Zarr.Types.IO.
We use Zarr_lwt Deferred module for `Deferred` so that the store can be
We use Zarr_lwt's Deferred module for `Deferred` so that the store can be
Lwt-aware.

To compile & run this example execute the command
Expand All @@ -14,7 +16,6 @@

module MemoryZipStore : sig
include Zarr.Storage.STORE with type 'a Deferred.t = 'a Lwt.t
(*val create : ?level:Zipc_deflate.level -> string -> t *)
val with_open : ?level:Zipc_deflate.level -> string -> (t -> 'a Deferred.t) -> 'a Deferred.t
end = struct
module M = Map.Make(String)
Expand Down Expand Up @@ -48,11 +49,9 @@ end = struct
| None -> raise (Zarr.Storage.Key_not_found key)
| Some m ->
match Zipc.Member.kind m with
| Zipc.Member.Dir -> failwith "cannot get size of directory."
| Zipc.Member.Dir -> failwith "A chunk key cannot be a directory."
| Zipc.Member.File f ->
match Zipc.File.to_binary_string f with
| Error e -> failwith e
| Ok s -> s
Result.fold ~error:failwith ~ok:Fun.id @@ Zipc.File.to_binary_string f

let get_partial_values t key ranges =
let+ data = get t key in
Expand Down Expand Up @@ -93,11 +92,8 @@ end = struct
match Zipc.Member.(make ~path:key @@ File f) with
| Error e -> failwith e
| Ok m ->
Lwt_mutex.with_lock
t.mutex
(fun () ->
t.ic <- Zipc.add m t.ic;
Deferred.return_unit)
Lwt_mutex.with_lock t.mutex @@ fun () ->
Deferred.return (t.ic <- Zipc.add m t.ic)

let set_partial_values t key ?(append=false) rv =
let f =
Expand All @@ -106,9 +102,9 @@ end = struct
Deferred.return @@ acc ^ v
else
fun acc (rs, v) ->
let s = Bytes.of_string acc in
let s = Bytes.unsafe_of_string acc in
String.(length v |> Bytes.blit_string v 0 s rs);
Deferred.return @@ Bytes.to_string s
Deferred.return @@ Bytes.unsafe_to_string s
in
match Zipc.Member.kind (Option.get @@ Zipc.find key t.ic) with
| Zipc.Member.Dir -> Deferred.return_unit
Expand All @@ -118,52 +114,41 @@ end = struct
| Ok s -> Deferred.fold_left f s rv >>= set t key

let erase t key =
Lwt_mutex.with_lock
t.mutex
(fun () ->
t.ic <- Zipc.remove key t.ic;
Deferred.return_unit)
Lwt_mutex.with_lock t.mutex @@ fun () ->
Deferred.return (t.ic <- Zipc.remove key t.ic)

let erase_prefix t prefix =
let m = Zipc.to_string_map t.ic in
let m' =
M.filter_map
(fun k v ->
if String.starts_with ~prefix k then None else Some v) m in
Lwt_mutex.with_lock
t.mutex
(fun () ->
t.ic <- Zipc.of_string_map m';
Deferred.return_unit)
end
let m' = M.filter_map
(fun k v -> if String.starts_with ~prefix k then None else Some v) m in
Lwt_mutex.with_lock t.mutex @@ fun () ->
Deferred.return (t.ic <- Zipc.of_string_map m')

let rename t ok nk =
Lwt_mutex.with_lock t.mutex @@ fun () ->
let m = Zipc.to_string_map t.ic in
let m1, m2 = M.partition (fun k _ -> String.starts_with ~prefix:ok k) m in
let l = String.length ok in
let s = Seq.map
(fun (k, v) -> nk ^ String.(length k - l |> sub k l), v) @@ M.to_seq m1 in
t.ic <- Zipc.of_string_map @@ M.add_seq s m2; Lwt.return_unit
end
(* this functor generates the public signature of our Zip file store. *)
include Zarr.Storage.Make(Z)

(* now we create functions to open and close the store. *)

(*let create ?(level=`Default) path = Z.{ic = Zipc.empty; level; path} *)

let with_open ?(level=`Default) path f =
let s = In_channel.(with_open_bin path input_all) in
let t = match Zipc.of_binary_string s with
| Ok ic -> Z.{ic; level; path; mutex = Lwt_mutex.create ()}
| Error e -> failwith e
in
Lwt.finalize
(fun () -> f t)
(fun () ->
Lwt_io.with_file
~flags:Unix.[O_WRONLY; O_TRUNC; O_CREAT; O_NONBLOCK]
~mode:Lwt_io.Output
t.path
(fun oc ->
let open Lwt.Syntax in
match Zipc.to_binary_string t.ic with
| Error e -> failwith e
| Ok s' ->
if String.equal s s' then Lwt.return_unit else
let* () = Lwt_io.write oc s' in Lwt_io.flush oc))
Lwt.finalize (fun () -> f t) @@ fun () ->
Lwt_io.with_file
~flags:Unix.[O_WRONLY; O_TRUNC; O_CREAT; O_NONBLOCK]
~mode:Lwt_io.Output
t.path
(fun oc ->
Result.fold ~error:failwith ~ok:(Lwt_io.write oc) @@ Zipc.to_binary_string t.ic)
end

let _ =
Expand All @@ -172,17 +157,18 @@ let _ =
let open Zarr.Indexing in
let open MemoryZipStore.Deferred.Syntax in

let printlist = [%show: string list] in
MemoryZipStore.with_open "examples/data/testdata.zip" @@ fun store ->
let* xs, _ = MemoryZipStore.find_all_nodes store in
print_endline @@ "All array nodes: " ^ printlist (List.map ArrayNode.to_path xs);
let anode = List.hd @@ List.filter
(fun node -> ArrayNode.to_path node = "/some/group/name") xs in
let slice = [|R [|0; 20|]; I 10; R [||]|] in
let* x = MemoryZipStore.read_array store anode slice Zarr.Ndarray.Char in
let x' = x |> Zarr.Ndarray.map @@ fun _ -> Random.int 256 |> Char.chr in
let* () = MemoryZipStore.write_array store anode slice x' in
let+ y = MemoryZipStore.read_array store anode slice Zarr.Ndarray.Char in
assert (Zarr.Ndarray.equal x' y)
let* y = MemoryZipStore.read_array store anode slice Zarr.Ndarray.Char in
assert (Zarr.Ndarray.equal x' y);
let* () = MemoryZipStore.rename_array store anode "name2" in
let+ exists = MemoryZipStore.array_exists store @@ ArrayNode.of_path "/some/group/name2" in
assert exists
end;
print_endline "Zip store has been update."
30 changes: 14 additions & 16 deletions examples/readonly_zipstore.ml
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
(* This module implements a Read-only Zip file zarr store that is Eio-aware.
The main requirement is to implement the signature of Zarr.Types.IO.
We use Zarr_eio's Deferred module for `Deferred` so that the store can be
Eio-aware. Since Zip stores cannot have files updated or removed, we only
implement the get_* and list_* family of functions and raise an
Not_implemented exception for the set_* and erase_* family of functions.
This effectively allows us to create a read-only store since calling any
of the following functions would result in an `Not_implemented` exception:
Eio-aware. We only implement the get_* and list_* family of functions and
raise a Not_implemented exception for the set_* and erase_* family of
functions. This effectively allows us to create a read-only store since
calling any of the following functions would result in error:
- ReadOnlyZipStore.create_group
- ReadOnlyZipStore.create_array
- ReadOnlyZipStore.erase_group_node
- ReadOnlyZipStore.erase_array_node
- ReadOnlyZipStore.erase_all_nodes
- ReadOnlyZipStore.write_array
- ReadOnlyZipStore.reshape
- ReadOnlyZipStore.rename_array
- ReadOnlyZipStore.rename_group
Below we show how to implement this custom Zarr Store.

To compile & run this example execute the command
Expand All @@ -21,11 +22,8 @@

module ReadOnlyZipStore : sig
exception Not_implemented

include Zarr.Storage.STORE with type 'a Deferred.t = 'a
val open_store : string -> t
val close : t -> unit

val with_open : string -> (t -> 'a) -> 'a
end = struct
exception Not_implemented

Expand Down Expand Up @@ -83,25 +81,25 @@ end = struct
let erase _ = raise Not_implemented

let erase_prefix _ = raise Not_implemented

let rename _ = raise Not_implemented
end

(* this functor generates the public signature of our Zip file store. *)
include Zarr.Storage.Make(Z)

(* now we create functions to open and close the store. *)
let open_store path = Zip.open_in path
let close = Zip.close_in
let with_open path f =
let x = Zip.open_in path in
Fun.protect ~finally:(fun () -> Zip.close_in x) @@ fun () -> f x
end

let _ =
Eio_main.run @@ fun _ ->
let open Zarr.Node in

let store = ReadOnlyZipStore.open_store "examples/data/testdata.zip" in
ReadOnlyZipStore.with_open "examples/data/testdata.zip" @@ fun store ->
let xs, _ = ReadOnlyZipStore.find_all_nodes store in
let anode = List.hd @@ Eio.Fiber.List.filter
(fun node -> ArrayNode.to_path node = "/some/group/name") xs in
let arr = ReadOnlyZipStore.read_array store anode [||] Zarr.Ndarray.Char in
try ReadOnlyZipStore.write_array store anode [||] arr with
| ReadOnlyZipStore.Not_implemented -> print_endline "Store is read-only";
ReadOnlyZipStore.close store
| ReadOnlyZipStore.Not_implemented -> print_endline "Store is read-only"
3 changes: 3 additions & 0 deletions zarr-eio/src/storage.ml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ module FilesystemStore = struct
| p when Eio.Path.is_directory p ->
Either.right @@ (fspath_to_key t p) ^ "/"
| p -> Either.left @@ fspath_to_key t p) (Eio.Path.read_dir dir)

let rename t k k' =
Eio.Path.rename (key_to_fspath t k) (key_to_fspath t k')
end

module U = Zarr.Util
Expand Down
19 changes: 17 additions & 2 deletions zarr-eio/test/test_eio.ml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ let test_storage
assert_equal exp got)
[[`ShardingIndexed cfg]; [`Bytes BE]];

let child = GroupNode.of_path "/some/child" in
let child = GroupNode.of_path "/some/child/group" in
create_group store child;
let arrays, groups = find_child_nodes store gnode in
assert_equal
Expand All @@ -86,7 +86,22 @@ let test_storage
List.fast_sort String.compare @@
List.map ArrayNode.show ac @ List.map GroupNode.show gc in
assert_equal
~printer:string_of_list ["/"; "/arrnode"; "/some"; "/some/child"] got;
~printer:string_of_list
["/"; "/arrnode"; "/some"; "/some/child"; "/some/child/group"] got;

(* tests for renaming nodes *)
let some = GroupNode.of_path "/some/child" in
rename_array store anode "ARRAYNODE";
rename_group store some "CHILD";
let ac, gc = find_all_nodes store in
let got =
List.fast_sort String.compare @@
List.map ArrayNode.show ac @ List.map GroupNode.show gc in
assert_equal
~printer:string_of_list
["/"; "/ARRAYNODE"; "/some"; "/some/CHILD"; "/some/CHILD/group"] got;
(* restore old array node name. *)
rename_array store (ArrayNode.of_path "/ARRAYNODE") "arrnode";

let nshape = [|25; 32; 10|] in
reshape store anode nshape;
Expand Down
3 changes: 3 additions & 0 deletions zarr-lwt/src/storage.ml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ module FilesystemStore = struct
| p when Sys.is_directory p ->
Either.right @@ (fspath_to_key t p) ^ "/"
| p -> Either.left @@ fspath_to_key t p) files

let rename t k k' =
Lwt_unix.rename (key_to_fspath t k) (key_to_fspath t k')
end

module U = Zarr.Util
Expand Down
19 changes: 17 additions & 2 deletions zarr-lwt/test/test_lwt.ml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ let test_storage
assert_equal exp got)
[[`ShardingIndexed cfg]; [`Bytes BE]] >>= fun () ->

let child = GroupNode.of_path "/some/child" in
let child = GroupNode.of_path "/some/child/group" in
create_group store child >>= fun () ->
find_child_nodes store gnode >>= fun (arrays, groups) ->
assert_equal
Expand All @@ -87,7 +87,22 @@ let test_storage
List.fast_sort String.compare @@
List.map ArrayNode.show ac @ List.map GroupNode.show gc in
assert_equal
~printer:string_of_list ["/"; "/arrnode"; "/some"; "/some/child"] got;
~printer:string_of_list
["/"; "/arrnode"; "/some"; "/some/child"; "/some/child/group"] got;

(* tests for renaming nodes *)
let some = GroupNode.of_path "/some/child" in
rename_array store anode "ARRAYNODE" >>= fun () ->
rename_group store some "CHILD" >>= fun () ->
find_all_nodes store >>= fun (ac, gc) ->
let got =
List.fast_sort String.compare @@
List.map ArrayNode.show ac @ List.map GroupNode.show gc in
assert_equal
~printer:string_of_list
["/"; "/ARRAYNODE"; "/some"; "/some/CHILD"; "/some/CHILD/group"] got;
(* restore old array node name. *)
rename_array store (ArrayNode.of_path "/ARRAYNODE") "arrnode" >>= fun () ->

let nshape = [|25; 32; 10|] in
reshape store anode nshape >>= fun () ->
Expand Down
Loading
Loading