From aac1256a54a7b93b6861e1f88cd142c9f43916c4 Mon Sep 17 00:00:00 2001 From: Pierre Hoffmeister <1093398+phoffmeister@users.noreply.github.com> Date: Tue, 24 Oct 2023 10:13:40 +0200 Subject: [PATCH] adding support for symbols (#43) * adding support for symbols * format code --- src/args.rs | 8 ++ src/config.rs | 34 +++++++++ src/expected_input.rs | 165 +++++++++++++++--------------------------- src/main.rs | 6 ++ 4 files changed, 107 insertions(+), 106 deletions(-) diff --git a/src/args.rs b/src/args.rs index a346603..e48b684 100644 --- a/src/args.rs +++ b/src/args.rs @@ -19,6 +19,14 @@ pub struct Args { #[arg(long)] pub numbers_ratio: Option, + /// indicates if test should include symbols + #[arg(short, long)] + pub symbols: Option, + + /// symbols-ratio argument + #[arg(long)] + pub symbols_ratio: Option, + /// path to dictionary file #[arg(long)] pub dictionary_path: Option, diff --git a/src/config.rs b/src/config.rs index 8d2fe71..b82bd5e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -7,6 +7,8 @@ //! | `duration` | `30` | number | duration of the test in seconds | //! | `numbers` | `false` | boolean | flag indicating if numbers should be inserted in expected input | //! | `numbers_ratio` | `0.05` (if numbers=TRUE) | number | ratio for putting numbers in the test | +//! | `symbols` | `false` | boolean | flag indicating if symbols should be inserted in expected input | +//! | `symbols_ratio` | `0.10` (if symbols=TRUE) | number | ratio for putting symbols in the test | //! | `uppercase` | `false` | boolean | flag indicating if uppercase letters should be inserted in expected input | //! | `uppercase_ratio` | `0.15` | boolean | ratio for putting uppercase letters in test | //! | `dictionary_path` | `None` (builtin dictionary) | string | path to file with dictionary words to sample from while creating test's expected input | @@ -64,6 +66,8 @@ pub struct Config { pub duration: Duration, pub numbers: bool, pub numbers_ratio: f64, + pub symbols: bool, + pub symbols_ratio: f64, pub dictionary_path: Option, pub uppercase: bool, pub uppercase_ratio: f64, @@ -77,6 +81,8 @@ struct ConfigFile { pub duration: Option, pub numbers: Option, pub numbers_ratio: Option, + pub symbols: Option, + pub symbols_ratio: Option, pub dictionary_path: Option, pub uppercase: Option, pub uppercase_ratio: Option, @@ -101,6 +107,8 @@ impl Config { duration: Duration::from_secs(30), numbers: false, numbers_ratio: 0.05, + symbols: false, + symbols_ratio: 0.10, dictionary_path: None, uppercase: false, uppercase_ratio: 0.15, @@ -158,6 +166,16 @@ fn augment_config_with_config_file(config: &mut Config, mut config_file: fs::Fil } } + if let Some(symbols) = config_from_file.symbols { + config.symbols = symbols; + } + + if let Some(symbols_ratio) = config_from_file.symbols_ratio { + if symbols_ratio >= 0.0 && symbols_ratio <= 1.0 { + config.symbols_ratio = symbols_ratio; + } + } + if let Some(dictionary_path) = config_from_file.dictionary_path { config.dictionary_path = Some(PathBuf::from(dictionary_path)); } @@ -217,6 +235,14 @@ fn augment_config_with_args(config: &mut Config, args: Args) { config.numbers_ratio = numbers_ratio } } + if let Some(symbols) = args.symbols { + config.symbols = symbols; + } + if let Some(symbols_ratio) = args.symbols_ratio { + if symbols_ratio >= 0.0 && symbols_ratio <= 1.0 { + config.symbols_ratio = symbols_ratio + } + } if let Some(duration) = args.duration { config.duration = Duration::from_secs(duration); } @@ -258,6 +284,8 @@ mod tests { numbers: None, numbers_ratio: None, dictionary_path: None, + symbols: None, + symbols_ratio: None, uppercase: None, uppercase_ratio: None, save_results: None, @@ -281,6 +309,8 @@ mod tests { duration: None, numbers: None, numbers_ratio: None, + symbols: None, + symbols_ratio: None, dictionary_path: None, uppercase: None, uppercase_ratio: None, @@ -301,6 +331,8 @@ mod tests { duration: Some(10), numbers: Some(true), numbers_ratio: None, + symbols: None, + symbols_ratio: None, dictionary_path: None, uppercase: None, uppercase_ratio: None, @@ -326,6 +358,8 @@ mod tests { duration: Some(20), numbers: Some(false), numbers_ratio: None, + symbols: None, + symbols_ratio: None, dictionary_path: Some(String::from("/etc/dict/words")), uppercase: None, uppercase_ratio: None, diff --git a/src/expected_input.rs b/src/expected_input.rs index 22d8c2b..35aad84 100644 --- a/src/expected_input.rs +++ b/src/expected_input.rs @@ -25,9 +25,15 @@ pub struct ExpectedInput { impl ExpectedInput { /// Create new struct instance by reading the dictionary file /// - /// After reading the file shuffle its content - /// then replace some words with numbers if specified in config - /// then save one long string to memory + /// After reading the file iterate over the words and apply the + /// specified settings. + /// + /// Each setting is applied according to the specified ratio. + /// * `uppercase` will capitalize the word. ("hello" => "Hello") + /// * `numbers` will turn each letter of a word into a random number. ( + /// "hello" => "52139") + /// * `symbols` will either append a symbol or surround the word with + /// matching symbols. ("hello" => "hello!", "hello" => "{hello}") pub fn new(config: &Config) -> Result { let mut str = dictionary::WORDS.to_string(); if let Some(dictionary_path) = &config.dictionary_path { @@ -38,85 +44,60 @@ impl ExpectedInput { .context("Unable to read dictionary file")?; } - let mut rng = thread_rng(); - let mut str_vec = str.split("\n").collect::>(); - let mut string_vec: Vec = str_vec.iter().map(|s| s.to_string()).collect(); - str_vec.shuffle(&mut rng); - - // creating a pointer which points to where the words starts in the vector to help with uppercase words since we replace - // words in the beginning with numbers when numbers are enabled. - let mut words_start_pos: usize = 0; - - if config.numbers == true { - words_start_pos = - replace_words_with_numbers(&mut string_vec, &mut rng, config.numbers_ratio); - str_vec = string_vec.iter().map(|s| s.as_str()).collect(); - } - - if config.uppercase == true { - create_uppercase_words(&mut string_vec, words_start_pos, config.uppercase_ratio) - .context("Unable to create uppercase words")?; - str_vec = string_vec.iter().map(|s| s.as_str()).collect(); - } + let ending_symbols = ['.', ',', '!', '?']; + let surrounding_symbols = ['[', ']', '{', '}', '(', ')', '"', '"', '\'', '\'']; - str_vec.shuffle(&mut rng); - - let str = str_vec.join(" ").trim().to_string(); + let mut rng = thread_rng(); + let mut str = str + .split("\n") + .map(|word| { + let mut word = word.to_string(); + + // uppercase + if config.uppercase && rng.gen::() < config.uppercase_ratio { + let mut c = word.chars(); + word = match c.next() { + None => String::new(), + Some(f) => f.to_uppercase().collect::() + c.as_str(), + }; + } + + // numbers + if config.numbers && rng.gen::() < config.numbers_ratio { + word = (0..word.len()) + .map(|_| rng.gen_range(b'0'..=b'9') as char) + .collect(); + } + + // symbols + if config.symbols && rng.gen::() < config.symbols_ratio { + word = match rng.gen::() % 2 { + 0 => { + let index = rng.gen::() % ending_symbols.len(); + format!("{}{}", word, ending_symbols[index]) + } + 1 => { + let index = (rng.gen::() % (surrounding_symbols.len() / 2)) * 2; + format!( + "{}{}{}", + surrounding_symbols[index], + word, + surrounding_symbols[index + 1] + ) + } + _ => word.to_string(), + } + } + word + }) + .collect::>(); + str.shuffle(&mut rng); + let str = str.join(" ").trim().to_string(); Ok(Self { str }) } } -/// In given vector of words replace some of them -/// -/// with words consisting only of numbers -/// number_ratio should be between [0, 1.0] -/// and tells how many percent of words should become numbers -fn replace_words_with_numbers( - string_vec: &mut Vec, - rng: &mut rand::rngs::ThreadRng, - numbers_ratio: f64, -) -> usize { - let change_to_num_threshold = (numbers_ratio * string_vec.len() as f64).round() as usize; - - *string_vec = string_vec - .iter() - .enumerate() - .map(|(index, word)| { - if index < change_to_num_threshold { - let random_digits: String = (0..word.len()) - .map(|_| rng.gen_range(b'0'..=b'9') as char) - .collect(); - return random_digits; - } - return word.to_string(); - }) - .collect(); - - return change_to_num_threshold - 1; -} - -fn create_uppercase_words( - string_vec: &mut Vec, - pos: usize, - uppercase_ratio: f64, -) -> Result<()> { - let num_uppercase_words = (uppercase_ratio * string_vec[pos..].len() as f64).round() as usize; - for i in pos..pos + num_uppercase_words { - if string_vec[i] != "" { - let mut v: Vec = string_vec[i].chars().collect(); - v[0] = v[0] - .to_uppercase() - .nth(0) - .context("Unable to get first character of a word")?; - let s: String = v.into_iter().collect(); - string_vec[i] = s; - } - } - - Ok(()) -} - /// extracted to trait to create mock with `mockall` crate #[automock] pub trait ExpectedInputInterface { @@ -162,6 +143,8 @@ mod tests { duration: Duration::from_secs(30), numbers: false, numbers_ratio: 0.05, + symbols: false, + symbols_ratio: 0.10, dictionary_path: Some(config_file.path().to_path_buf()), uppercase: false, uppercase_ratio: 0.45, @@ -192,36 +175,6 @@ mod tests { assert_eq!(expected_input.get_string(11), "abc abc abc"); } - #[test] - fn should_replace_words_with_numbers() { - let mut string_vec = vec![ - "item1".to_string(), - "item2".to_string(), - "item3".to_string(), - "item4".to_string(), - "item5".to_string(), - "item6".to_string(), - "item7".to_string(), - "item8".to_string(), - ]; - let mut rng = thread_rng(); - let numbers_ratio = 0.5; - - replace_words_with_numbers(&mut string_vec, &mut rng, numbers_ratio); - - let items_with_only_digits: Vec<&String> = string_vec - .iter() - .filter(|item| item.chars().all(|c| c.is_digit(10))) - .collect(); - - let change_to_num_threshold = (numbers_ratio * string_vec.len() as f64).round() as usize; - assert_eq!(change_to_num_threshold, 4); - assert_eq!( - items_with_only_digits.len(), - 4, - "At least 4 items contain only digits" - ); - } #[test] fn should_work_with_non_ascii_chars() { let expected_input = ExpectedInput { diff --git a/src/main.rs b/src/main.rs index b9f40f5..bdccbd8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,6 +23,8 @@ //! | `duration` | `30` | number | duration of the test in seconds | //! | `numbers` | `false` | boolean | flag indicating if numbers should be inserted in expected input | //! | `numbers_ratio` | `0.05` (if numbers=TRUE) | number | ratio for putting numbers in the test | +//! | `symbols` | `false` | boolean | flag indicating if symbols should be inserted in expected input | +//! | `symbols_ratio` | `0.10` (if symbols=TRUE) | number | ratio for putting symbols in the test | //! | `uppercase` | `false` | boolean | flag indicating if uppercase letters should be inserted in expected input | //! | `uppercase_ratio` | `0.15` | boolean | ratio for putting uppercase letters in test | //! | `dictionary_path` | `None` (builtin dictionary) | string | path to file with dictionary words to sample from while creating test's expected input | @@ -272,6 +274,8 @@ mod tests { uppercase: None, uppercase_ratio: None, numbers_ratio: None, + symbols: None, + symbols_ratio: None, save_results: None, history: None, }; @@ -306,6 +310,8 @@ mod tests { uppercase_ratio: None, numbers: None, numbers_ratio: None, + symbols: None, + symbols_ratio: None, save_results: None, history: None, };