Skip to content

Commit

Permalink
Add --outgroup and --chop to fasr refine
Browse files Browse the repository at this point in the history
  • Loading branch information
wang-q committed Jul 24, 2023
1 parent fe7f1b6 commit 382a1de
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 11 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,9 @@ cargo run --bin fasr consensus tests/fasr/example.fas
cargo run --bin fasr consensus tests/fasr/refine.fas
cargo run --bin fasr consensus tests/fasr/refine.fas --outgroup -p 2

cargo run --bin fasr refine tests/fasr/refine.fas
cargo run --bin fasr refine tests/fasr/example.fas
cargo run --bin fasr refine tests/fasr/example.fas --msa none --chop 10
cargo run --bin fasr refine tests/fasr/refine2.fas --msa clustalw --outgroup

cargo run --bin fasr split tests/fasr/example.fas --simple
cargo run --bin fasr split tests/fasr/example.fas -o . --chr --suffix .tmp
Expand Down
48 changes: 44 additions & 4 deletions src/cmd_fasr/refine.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use clap::*;
use crossbeam::channel::bounded;
use intspan::*;
use std::string::String;

// Create clap subcommand arguments
pub fn make_subcommand() -> Command {
Expand All @@ -15,12 +16,13 @@ pub fn make_subcommand() -> Command {
* mafft
* muscle
* clustalw
* none: means skip realigning
* For aligned files converted from .axt or .maf, we can use the `--quick` option
to align only indel-adjacent regions
* Running in parallel mode with 1 reader, 1 writer and the corresponding number of workers
* The order of output may be different from the original
* The order of blocks in output may be different from the original
"###,
)
Expand All @@ -38,6 +40,20 @@ pub fn make_subcommand() -> Command {
.default_value("clustalw")
.help("Aligning program"),
)
.arg(
Arg::new("has_outgroup")
.long("outgroup")
.action(ArgAction::SetTrue)
.help("There are outgroups at the end of each block"),
)
.arg(
Arg::new("chop")
.long("chop")
.value_parser(value_parser!(usize))
.num_args(1)
.default_value("0")
.help("Chop head and tail indels"),
)
.arg(
Arg::new("parallel")
.long("parallel")
Expand Down Expand Up @@ -89,18 +105,42 @@ fn proc_block(block: &FasBlock, args: &ArgMatches) -> anyhow::Result<String> {
// Args
//----------------------------
let msa = args.get_one::<String>("msa").unwrap();
let has_outgroup = args.get_flag("has_outgroup");
let chop = *args.get_one::<usize>("chop").unwrap();

//----------------------------
// Operating
// Realigning
//----------------------------
let mut seqs = vec![];
let mut seqs: Vec<&[u8]> = vec![];
let mut ranges = vec![];
for entry in &block.entries {
seqs.push(entry.seq().as_ref());
ranges.push(entry.range().clone());
}

let aligned = align_seqs(&seqs, msa)?;
let mut aligned = vec![];
if *msa == "none".to_string() {
for seq in seqs {
aligned.push(String::from_utf8(seq.to_vec()).unwrap());
}
} else {
aligned = align_seqs(&seqs, msa)?;
};

// String::from_utf8(record.seq().to_vec()).unwrap()

//----------------------------
// Trimming
//----------------------------
trim_pure_dash(&mut aligned);
if has_outgroup {
trim_outgroup(&mut aligned);
let _ = trim_complex_indel(&mut aligned);
}

if chop > 0 {
trim_head_tail(&mut aligned, &mut ranges, chop);
}

//----------------------------
// Output
Expand Down
90 changes: 84 additions & 6 deletions tests/cli_fasr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,72 @@ fn command_consensus() -> anyhow::Result<()> {

#[test]
fn command_refine() -> anyhow::Result<()> {
let mut cmd = Command::cargo_bin("fasr")?;
let output = cmd
.arg("refine")
.arg("tests/fasr/example.fas")
.arg("--msa")
.arg("none")
.output()
.unwrap();
let stdout = String::from_utf8(output.stdout).unwrap();

assert_eq!(stdout.lines().count(), 27);

// --parallel 2
let mut cmd = Command::cargo_bin("fasr")?;
let output = cmd
.arg("refine")
.arg("tests/fasr/example.fas")
.arg("--msa")
.arg("none")
.arg("-p")
.arg("2")
.output()
.unwrap();
let stdout = String::from_utf8(output.stdout).unwrap();

assert_eq!(stdout.lines().count(), 27);

// --parallel 2
let mut cmd = Command::cargo_bin("fasr")?;
let output = cmd
.arg("refine")
.arg("tests/fasr/refine2.fas")
.arg("--msa")
.arg("none")
.arg("-p")
.arg("2")
.output()
.unwrap();
let stdout = String::from_utf8(output.stdout).unwrap();

assert_eq!(stdout.lines().count(), 7);

// --chop 10
let mut cmd = Command::cargo_bin("fasr")?;
let output = cmd
.arg("refine")
.arg("tests/fasr/example.fas")
.arg("--msa")
.arg("none")
.arg("--chop")
.arg("10")
.output()
.unwrap();
let stdout = String::from_utf8(output.stdout).unwrap();

assert_eq!(stdout.lines().count(), 27);
assert!(stdout.contains("185276-185332"), "new header"); // 185273-185334
assert!(stdout.contains("156668-156724"), "new header"); // 156665-156726
assert!(stdout.contains("3670-3727"), "new header"); // (-):3668-3730
assert!(stdout.contains("2102-2159"), "new header"); // (-):2102-2161

Ok(())
}

#[test]
fn command_refine_msa() -> anyhow::Result<()> {
let mut bin = String::new();
for e in &["clustalw", "clustal-w", "clustalw2"] {
if let Ok(pth) = which::which(e) {
Expand All @@ -500,21 +566,33 @@ fn command_refine() -> anyhow::Result<()> {
assert_eq!(stdout.lines().count(), 18);
assert!(stdout.contains("---"), "dashes added");

// --parallel
// --outgroup
let mut cmd = Command::cargo_bin("fasr")?;
let output = cmd
.arg("refine")
.arg("tests/fasr/refine.fas")
.arg("tests/fasr/refine2.fas")
.arg("--msa")
.arg("clustalw")
.arg("--parallel")
.arg("2")
.arg("--outgroup")
.output()
.unwrap();
let stdout = String::from_utf8(output.stdout).unwrap();

assert_eq!(stdout.lines().count(), 18);
assert!(stdout.contains("---"), "dashes added");
assert_eq!(stdout.lines().count(), 7);
assert!(stdout.contains("CA-GT"), "outgroup trimmed");

let mut cmd = Command::cargo_bin("fasr")?;
let output = cmd
.arg("refine")
.arg("tests/fasr/refine2.fas")
.arg("--msa")
.arg("clustalw")
.output()
.unwrap();
let stdout = String::from_utf8(output.stdout).unwrap();

assert_eq!(stdout.lines().count(), 7);
assert!(stdout.contains("CA--GT"), "outgroup not trimmed");

Ok(())
}
Expand Down
6 changes: 6 additions & 0 deletions tests/fasr/refine2.fas
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>S288c
TCGTCA--GTAGGTTTACCA
>RM11
TCGTCAC-GTAGGTTTACCA
>Spar
TCGTCACAGTAGG---ACCG

0 comments on commit 382a1de

Please sign in to comment.