Skip to content

Commit

Permalink
fasr link --best
Browse files Browse the repository at this point in the history
  • Loading branch information
wang-q committed Jul 16, 2023
1 parent 10d7e85 commit 4babc86
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 10 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -528,15 +528,16 @@ fasr axt2fas tests/fasr/RM11_1a.chr.sizes tests/fasr/example.axt --qname RM11_1a

fasr name tests/fasr/example.fas --count

cargo run --bin fasr cover tests/fasr/example.fas
fasr cover tests/fasr/example.fas

cargo run --bin fasr cover tests/fasr/example.fas --name S288c --trim 10
fasr cover tests/fasr/example.fas --name S288c --trim 10

fasr concat tests/fasr/name.lst tests/fasr/example.fas

fasr subset tests/fasr/name.lst tests/fasr/example.fas

fasr link tests/fasr/example.fas --pair
cargo run --bin fasr link tests/fasr/example.fas --best

samtools faidx tests/fasr/NC_000932.fa NC_000932:1-10

Expand Down
33 changes: 33 additions & 0 deletions src/cmd_fasr/link.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use clap::*;
use intspan::*;
use itertools::Itertools;

// Create clap subcommand arguments
pub fn make_subcommand() -> Command {
Expand All @@ -25,6 +26,12 @@ pub fn make_subcommand() -> Command {
.action(ArgAction::SetTrue)
.help("Bilateral (pairwise) links"),
)
.arg(
Arg::new("best")
.long("best")
.action(ArgAction::SetTrue)
.help("best-to-best bilateral links"),
)
.arg(
Arg::new("outfile")
.long("outfile")
Expand All @@ -42,6 +49,7 @@ pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
//----------------------------
let mut writer = writer(args.get_one::<String>("outfile").unwrap());
let is_pair = args.get_flag("pair");
let is_best = args.get_flag("best");

for infile in args.get_many::<String>("infiles").unwrap() {
let mut reader = reader(infile);
Expand All @@ -61,6 +69,31 @@ pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
writer.write_all(format!("{}\t{}\n", headers[i], headers[j]).as_ref())?;
}
}
} else if is_best {
let mut best_pair: Vec<(usize, usize)> = vec![];
for i in 0..headers.len() {
let mut dist_idx: (f32, usize) = (1.0, headers.len() - 1);
for j in 0..headers.len() {
if i == j {
continue;
}
let dist = pair_d(block.entries[i].seq(), block.entries[j].seq());
if dist < dist_idx.0 {
dist_idx = (dist, j);
}
}
if i < dist_idx.1 {
best_pair.push((i, dist_idx.1));
} else {
best_pair.push((dist_idx.1, i));
}
}
// from itertools
let best_pair: Vec<(usize, usize)> = best_pair.into_iter().unique().collect();

for (i, j) in best_pair {
writer.write_all(format!("{}\t{}\n", headers[i], headers[j]).as_ref())?;
}
} else {
writer.write_all(format!("{}\n", headers.join("\t")).as_ref())?;
}
Expand Down
1 change: 0 additions & 1 deletion src/fasr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ fn main() -> anyhow::Result<()> {
Ok(())
}

// TODO: fasr link --best
// TODO: replace samtools
// TODO: add more tools
// TODO: simple - replace, slice
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ extern crate lazy_static;
mod libs;
mod utils;

pub use crate::libs::alignment::*;
pub use crate::libs::coverage::*;
pub use crate::libs::fas::*;
pub use crate::libs::intspan::*;
Expand Down
45 changes: 45 additions & 0 deletions src/libs/alignment.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
use std::collections::HashSet;

lazy_static! {
static ref BASES: HashSet<u8> = vec![b'a', b'g', b'c', b't', b'A', b'G', b'C', b'T',]
.into_iter()
.collect();
}

/// Distance (D) between two sequences
///
/// ```
/// // * ** **
/// let seq1 = b"GTCTGCATGCN";
/// let seq2 = b"TTTAGCTAgc-";
/// // difference 5
/// // comparable 10
/// assert_eq!(intspan::pair_d(seq1, seq2), 0.5);
/// ```
pub fn pair_d(seq1: &[u8], seq2: &[u8]) -> f32 {
assert_eq!(
seq1.len(),
seq2.len(),
"Two sequences of different length ({}!={})",
seq1.len(),
seq2.len()
);

let mut comparable = 0;
let mut difference = 0;

for (base1, base2) in seq1.iter().zip(seq2) {
if BASES.contains(base1) && BASES.contains(base2) {
comparable += 1;
if base1.to_ascii_uppercase() != base2.to_ascii_uppercase() {
difference += 1;
}
}
}

assert_ne!(comparable, 0, "Comparable bases shouldn't be zero");

// eprintln!("{} {}", difference, comparable);

difference as f32 / comparable as f32
}
1 change: 1 addition & 0 deletions src/libs/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod alignment;
pub mod coverage;
pub mod fas;
pub mod intspan;
Expand Down
29 changes: 22 additions & 7 deletions tests/cli_fasr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,7 @@ fn command_link() -> anyhow::Result<()> {
assert_eq!(stdout.lines().count(), 3);
assert_eq!(stdout.lines().next().unwrap().split_whitespace().count(), 4);

Ok(())
}

#[test]
fn command_link_pair() -> anyhow::Result<()> {
// --pair
let mut cmd = Command::cargo_bin("fasr")?;
let output = cmd
.arg("link")
Expand All @@ -195,6 +191,19 @@ fn command_link_pair() -> anyhow::Result<()> {
assert_eq!(stdout.lines().count(), 18);
assert_eq!(stdout.lines().next().unwrap().split_whitespace().count(), 2);

// --best
let mut cmd = Command::cargo_bin("fasr")?;
let output = cmd
.arg("link")
.arg("tests/fasr/example.fas")
.arg("--best")
.output()
.unwrap();
let stdout = String::from_utf8(output.stdout).unwrap();

assert_eq!(stdout.lines().count(), 9);
assert_eq!(stdout.lines().next().unwrap().split_whitespace().count(), 2);

Ok(())
}

Expand Down Expand Up @@ -394,7 +403,10 @@ fn command_join() -> anyhow::Result<()> {
let stdout = String::from_utf8(output.stdout).unwrap();

assert_eq!(stdout.lines().count(), 5);
assert!(stdout.lines().next().unwrap().contains(">Spar"), "Selected name first");
assert!(
stdout.lines().next().unwrap().contains(">Spar"),
"Selected name first"
);

let mut cmd = Command::cargo_bin("fasr")?;
let output = cmd
Expand All @@ -407,7 +419,10 @@ fn command_join() -> anyhow::Result<()> {
let stdout = String::from_utf8(output.stdout).unwrap();

assert_eq!(stdout.lines().count(), 9);
assert!(stdout.lines().next().unwrap().contains(">S288c."), "First name first");
assert!(
stdout.lines().next().unwrap().contains(">S288c."),
"First name first"
);

Ok(())
}

0 comments on commit 4babc86

Please sign in to comment.