Skip to content

Commit

Permalink
Reduce task params memory pressure by sending seed instead of entire …
Browse files Browse the repository at this point in the history
…RNG (in theory this also improves output quality as we don't ever have RNGs in the same state)

Signed-off-by: Alex Saveau <[email protected]>
  • Loading branch information
SUPERCILEX committed Dec 11, 2023
1 parent aa1c405 commit 7c73745
Show file tree
Hide file tree
Showing 223 changed files with 408,710 additions and 345,066 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Generate ~10_000 files with ~1 MB of random data spread across them:
```console
$ ftzz ./with_data -n 10K -b 1M
About 10,000 files will be generated in approximately 1,000 directories distributed across a tree of maximum depth 5 where each directory contains approximately 4 other directories. Each file will contain approximately 100 bytes of random data.
Created 9,419 files (929.4 KB) across 1,138 directories.
Created 9,652 files (960.1 KB) across 930 directories.

```

Expand Down
58 changes: 46 additions & 12 deletions src/core/file_contents.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
use std::{fs::File, io, io::Read};

use cfg_if::cfg_if;
use rand::RngCore;
use rand::{RngCore, SeedableRng};
use rand_distr::Normal;
use rand_xoshiro::Xoshiro256PlusPlus;

use crate::{core::sample_truncated, utils::FastPathBuf};

pub trait FileContentsGenerator {
type State;

fn initialize(&self) -> Self::State;

fn create_file(
&mut self,
file: &mut FastPathBuf,
file_num: usize,
retryable: bool,
state: &mut Self::State,
) -> io::Result<u64>;

fn byte_counts_pool_return(self) -> Option<Vec<u64>>;
Expand All @@ -20,8 +26,18 @@ pub trait FileContentsGenerator {
pub struct NoGeneratedFileContents;

impl FileContentsGenerator for NoGeneratedFileContents {
type State = ();

fn initialize(&self) -> Self::State {}

#[inline]
fn create_file(&mut self, file: &mut FastPathBuf, _: usize, _: bool) -> io::Result<u64> {
fn create_file(
&mut self,
file: &mut FastPathBuf,
_: usize,
_: bool,
(): &mut Self::State,
) -> io::Result<u64> {
cfg_if! {
if #[cfg(any(not(unix), miri))] {
File::create(file).map(|_| 0)
Expand Down Expand Up @@ -59,24 +75,33 @@ impl FileContentsGenerator for NoGeneratedFileContents {
}
}

pub struct OnTheFlyGeneratedFileContents<R: RngCore> {
pub struct OnTheFlyGeneratedFileContents {
pub num_bytes_distr: Normal<f64>,
pub random: R,
pub seed: u64,
pub fill_byte: Option<u8>,
}

impl<R: RngCore + 'static> FileContentsGenerator for OnTheFlyGeneratedFileContents<R> {
impl FileContentsGenerator for OnTheFlyGeneratedFileContents {
type State = Xoshiro256PlusPlus;

fn initialize(&self) -> Self::State {
let Self { seed, .. } = *self;

Xoshiro256PlusPlus::seed_from_u64(seed)
}

#[inline]
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
fn create_file(
&mut self,
file: &mut FastPathBuf,
file_num: usize,
retryable: bool,
random: &mut Self::State,
) -> io::Result<u64> {
let Self {
ref num_bytes_distr,
ref mut random,
seed: _,
fill_byte,
} = *self;

Expand Down Expand Up @@ -109,7 +134,7 @@ impl<R: RngCore + 'static> FileContentsGenerator for OnTheFlyGeneratedFileConten
Ok(num_bytes)
})
} else {
NoGeneratedFileContents.create_file(file, file_num, retryable)
NoGeneratedFileContents.create_file(file, file_num, retryable, &mut ())
}
}

Expand All @@ -118,23 +143,32 @@ impl<R: RngCore + 'static> FileContentsGenerator for OnTheFlyGeneratedFileConten
}
}

pub struct PreDefinedGeneratedFileContents<R: RngCore> {
pub struct PreDefinedGeneratedFileContents {
pub byte_counts: Vec<u64>,
pub random: R,
pub seed: u64,
pub fill_byte: Option<u8>,
}

impl<R: RngCore + 'static> FileContentsGenerator for PreDefinedGeneratedFileContents<R> {
impl FileContentsGenerator for PreDefinedGeneratedFileContents {
type State = Xoshiro256PlusPlus;

fn initialize(&self) -> Self::State {
let Self { seed, .. } = *self;

Xoshiro256PlusPlus::seed_from_u64(seed)
}

#[inline]
fn create_file(
&mut self,
file: &mut FastPathBuf,
file_num: usize,
retryable: bool,
random: &mut Self::State,
) -> io::Result<u64> {
let Self {
ref byte_counts,
ref mut random,
seed: _,
fill_byte,
} = *self;

Expand All @@ -144,7 +178,7 @@ impl<R: RngCore + 'static> FileContentsGenerator for PreDefinedGeneratedFileCont
.and_then(|f| write_bytes(f, num_bytes, (fill_byte, random)))
.map(|()| num_bytes)
} else {
NoGeneratedFileContents.create_file(file, file_num, retryable)
NoGeneratedFileContents.create_file(file, file_num, retryable, &mut ())
}
}

Expand Down
5 changes: 3 additions & 2 deletions src/core/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,14 @@ fn create_files(
file: &mut FastPathBuf,
contents: &mut impl FileContentsGenerator,
) -> Result<u64, io::Error> {
let mut state = contents.initialize();
let mut bytes_written = 0;

let mut start_file = 0;
if num_files > 0 {
with_file_name(offset, |s| file.push(s));

match contents.create_file(file, 0, true) {
match contents.create_file(file, 0, true, &mut state) {
Ok(bytes) => {
bytes_written += bytes;
start_file += 1;
Expand All @@ -104,7 +105,7 @@ fn create_files(
with_file_name(i + offset, |s| file.push(s));

bytes_written += contents
.create_file(file, i.try_into().unwrap_or(usize::MAX), false)
.create_file(file, i.try_into().unwrap_or(usize::MAX), false, &mut state)
.attach_printable_lazy(|| format!("Failed to create file {file:?}"))?;

file.pop();
Expand Down
6 changes: 3 additions & 3 deletions src/core/tasks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ impl<R: RngCore + Clone + Send + 'static> TaskGenerator for DynamicGenerator<R>
queue(
build_params!(OnTheFlyGeneratedFileContents {
num_bytes_distr,
random: random.clone(),
seed: random.next_u64(),
fill_byte,
}),
false,
Expand Down Expand Up @@ -371,7 +371,7 @@ impl<R: RngCore + Clone + Send + 'static> StaticGenerator<R> {
queue(
build_params!(PreDefinedGeneratedFileContents {
byte_counts,
random: random.clone(),
seed: random.next_u64(),
fill_byte,
}),
done,
Expand All @@ -383,7 +383,7 @@ impl<R: RngCore + Clone + Send + 'static> StaticGenerator<R> {
queue(
build_params!(OnTheFlyGeneratedFileContents {
num_bytes_distr,
random: random.clone(),
seed: random.next_u64(),
fill_byte,
}),
done,
Expand Down
2 changes: 1 addition & 1 deletion testdata/cmds/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
```console
$ ftzz -n 1K -b 100 --files-exact --bytes-exact all-exact-explicit
Exactly 1,000 files will be generated in approximately 1,000 directories distributed across a tree of maximum depth 5 where each directory contains approximately 4 other directories. Each file will contain approximately 0 bytes of random data totaling exactly 100 bytes.
Created 1,000 files (100 B) across 788 directories.
Created 1,000 files (100 B) across 506 directories.

```

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
About 10,000 files will be generated in approximately 1 directory distributed across a tree of maximum depth 0 where each directory contains approximately 0 other directories. Each file will contain approximately 10 bytes of random data.
Created 9,024 files (90.2 KB) across 0 directories.
Created 9,024 files (90.8 KB) across 0 directories.

/0
/1
Expand Down Expand Up @@ -9026,4 +9026,4 @@ Created 9,024 files (90.2 KB) across 0 directories.
/998
/999

0x8167745075cde23d
0xfffb6f2d2deef4d1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
About 10,000 files will be generated in approximately 1 directory distributed across a tree of maximum depth 0 where each directory contains approximately 0 other directories. Each file will contain approximately 10 bytes of random data.
Created 9,024 files (90.2 KB) across 0 directories.
Created 9,024 files (90.8 KB) across 0 directories.

/0
/1
Expand Down Expand Up @@ -9026,4 +9026,4 @@ Created 9,024 files (90.2 KB) across 0 directories.
/998
/999

0x8167745075cde23d
0xfffb6f2d2deef4d1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
About 10,000 files will be generated in approximately 1 directory distributed across a tree of maximum depth 0 where each directory contains approximately 0 other directories. Each file will contain approximately 10 bytes of random data.
Created 9,024 files (90.2 KB) across 0 directories.
Created 9,024 files (90.8 KB) across 0 directories.

/0
/1
Expand Down Expand Up @@ -9026,4 +9026,4 @@ Created 9,024 files (90.2 KB) across 0 directories.
/998
/999

0x8167745075cde23d
0xfffb6f2d2deef4d1
Loading

0 comments on commit 7c73745

Please sign in to comment.