Skip to content

Commit

Permalink
feat!: use real BFS and spooled tempfile
Browse files Browse the repository at this point in the history
  • Loading branch information
brglng committed Aug 8, 2024
1 parent 037a203 commit 2376585
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 69 deletions.
135 changes: 82 additions & 53 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,73 +1,100 @@
use std::env;
use std::collections::{HashSet, VecDeque};
use std::fs;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::process::exit;

fn depth_first_traverse(prog: &str, root: &Path, allow_hidden: bool, follow_links: bool, iter_depth: i32, depth: i32, ignores: &HashSet<String>) -> bool {
let mut has_next_level = false;
let entries = fs::read_dir(root);
if let Ok(entries) = entries {
for entry in entries {
if let Ok(entry) = entry {
let path = entry.path();
if let Some(file_name) = path.file_name() {
if let Some(file_name_str) = file_name.to_str() {
if let Some(first_char) = file_name_str.chars().next() {
let is_hidden = first_char == '.';
let is_link = path.read_link().is_ok();
if (follow_links || !is_link) && (allow_hidden || !is_hidden) && ignores.get(file_name_str).is_none() {
if depth < iter_depth {
if path.is_dir() && depth_first_traverse(prog, &entry.path(), allow_hidden, follow_links, iter_depth, depth + 1, ignores) {
has_next_level = true;
}
} else {
if path.is_dir() {
has_next_level = true;
}
println!("{}", path.display());
}
mod path_queue;
use path_queue::PathQueue;

fn breadth_first_traverse(prog: &str, roots: Vec<String>, allow_hidden: bool, follow_links: bool, ignores: &HashSet<String>) -> path_queue::Result<()> {
let dotdir = Path::new(".");

let mut q = PathQueue::new(1024 * 1024, 1024 * 512);

if roots.is_empty() {
q.push(PathBuf::from("."))?;
} else {
for root in roots {
q.push(PathBuf::from(root))?;
}
}

while let Ok(path) = q.pop() {
if !follow_links && path.is_symlink() {
continue;
}
if path != dotdir {
if let Some(file_name) = path.file_name() {
if let Some(file_name) = file_name.to_str() {
if !allow_hidden {
if let Some(first_char) = file_name.chars().next() {
if first_char == '.' {
continue;
}
}
} else {
eprintln!("{}: {}: cannot read filename", prog, path.display());
}
if ignores.get(file_name).is_some() {
continue;
}
} else {
eprintln!("{}: {}: cannot read filename", prog, path.display());
eprintln!("{}: {}: cannot read filename", prog, path.display())
}
} else if let Err(e) = entry {
eprintln!("{}: {}: {}", prog, root.display(), e);
} else {
// path ends with ".."
}
}
} else if let Err(e) = entries {
eprintln!("{}: {}: {}", prog, root.display(), e);
}
return has_next_level;
}

fn iterative_deepening(prog: &str, mut roots: Vec<String>, allow_dot: bool, follow_links: bool, max_depth: i32, ignores: HashSet<String>) {
if roots.is_empty() {
for depth in 1..=max_depth {
if !depth_first_traverse(prog, Path::new("."), allow_dot, follow_links, depth, 1, &ignores) {
break;
}
}
} else {
for depth in 1..=max_depth {
let mut i = 0_usize;
while i < roots.len() {
if !depth_first_traverse(prog, Path::new(&roots[i]), allow_dot, follow_links, depth, 1, &ignores) {
roots.remove(i);
if roots.is_empty() {
return;
} else {
continue;
let entries = fs::read_dir(&path);
if let Ok(entries) = entries {
for entry in entries {
if let Ok(entry) = entry {
let mut path = entry.path();
if follow_links && path.is_symlink() {
let p = path.read_link();
if let Ok(p) = p {
path = p;
} else {
eprintln!("{}: {}: {}", prog, path.display(), p.unwrap_err());
continue;
}
}
if let Some(file_name) = path.file_name() {
if let Some(file_name) = file_name.to_str() {
if !allow_hidden {
if let Some(first_char) = file_name.chars().next() {
if first_char == '.' {
continue;
}
}
}
if ignores.get(file_name).is_some() {
continue;
}
println!("{}", path.display());
if path.is_dir() {
if let Err(e) = q.push(path) {
eprintln!("{}: {}", prog, e);
};
}
} else {
eprintln!("{}: {}: cannot read filename", prog, path.display());
}
}
} else {
eprintln!("{}: {}: {}", prog, path.display(), entry.unwrap_err());
}
i += 1;
}
} else {
eprintln!("{}: {}: {}", prog, path.display(), entries.unwrap_err());
}

if q.len() == 0 {
break;
}
}

Ok(())
}

#[derive(PartialEq, Eq)]
Expand Down Expand Up @@ -164,5 +191,7 @@ fn main() {
}
}

iterative_deepening(prog, roots, allow_dot, follow_links, max_depth, ignores);
if let Err(e) = breadth_first_traverse(prog, roots, allow_dot, follow_links, &ignores) {
eprintln!("{}: {}", prog, e);
}
}
35 changes: 19 additions & 16 deletions src/path_queue.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#![allow(dead_code)]

use std::error;
use std::ffi::OsStr;
use std::fmt;
use std::io;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::fs::File;
use std::os::unix::ffi::OsStrExt;
use std::path::PathBuf;
use std::sync::mpsc;
use std::sync::mpsc::{Sender, Receiver};
Expand Down Expand Up @@ -76,20 +76,22 @@ impl TempfilePathQueue {
pub fn len(&self) -> usize { self.len }

pub fn push(&mut self, path: PathBuf) -> Result<()> {
writeln!(self.writer, "{}", &path.display())?;
self.writer.write_all(path.as_os_str().as_bytes())?;
self.writer.write_all(b"\0")?;
self.len += 1;
return Ok(());
Ok(())
}

pub fn pop(&mut self) -> Result<PathBuf> {
let mut buffer = String::new();
let len = self.reader.read_line(&mut buffer)?;
if len == 0 {
let mut buffer = vec![];
let num_bytes = self.reader.read_until(b'\0', &mut buffer)?;
if num_bytes == 0 {
self.writer.flush()?;
self.reader.read_line(&mut buffer)?;
self.reader.read_until(b'\0', &mut buffer)?;
}
buffer.pop();
self.len -= 1;
Ok(PathBuf::from(buffer.trim_end()))
Ok(PathBuf::from(OsStr::from_bytes(&buffer)))
}
}

Expand Down Expand Up @@ -127,13 +129,14 @@ enum Storage {

// This queue stores the queue to a disk file if the queue is too large.
pub struct PathQueue {
q: Storage,
max_mem_len: usize,
q: Storage,
mem_to_tempfile_thresh: usize,
tempfile_to_mem_thresh: usize,
}

impl PathQueue {
pub fn new(max_mem_len: usize) -> Self {
PathQueue{ q: Storage::Mem(MemPathQueue::new()), max_mem_len }
pub fn new(mem_to_tempfile_thresh: usize, tempfile_to_mem_thresh: usize) -> Self {
PathQueue{ q: Storage::Mem(MemPathQueue::new()), mem_to_tempfile_thresh, tempfile_to_mem_thresh }
}

pub fn len(&self) -> usize {
Expand Down Expand Up @@ -162,7 +165,7 @@ impl PathQueue {
pub fn push(&mut self, path: PathBuf) -> Result<()> {
match self.q {
Storage::Mem(ref mut memq) => {
if memq.len() < self.max_mem_len {
if memq.len() < self.mem_to_tempfile_thresh {
return memq.push(path);
} else {
let mut tempfileq = TempfilePathQueue::new()?;
Expand All @@ -187,7 +190,7 @@ impl PathQueue {
},
Storage::Tempfile(ref mut tempfileq) => {
let path = tempfileq.pop()?;
if tempfileq.len() < self.max_mem_len / 2 {
if tempfileq.len() < self.tempfile_to_mem_thresh {
let mut memq = MemPathQueue::new();
for _ in 0..tempfileq.len() {
memq.push(tempfileq.pop()?)?;
Expand All @@ -207,7 +210,7 @@ mod tests {

#[test]
fn path_queue() -> Result<()> {
let mut q = PathQueue::new(4);
let mut q = PathQueue::new(4, 2);
q.push(PathBuf::from("a/b"))?;
q.push(PathBuf::from("b/c"))?;
q.push(PathBuf::from("c/d"))?;
Expand Down

0 comments on commit 2376585

Please sign in to comment.