Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add an optional filter to VCF variant annotation #630

Open
wants to merge 1 commit into
base: release-5.2.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ public class VariantAnnotationCommandOptions {
arity = 0)
public boolean checkAminoAcidChange;

@Parameter(names = {"--filter"}, description = "string indicating the FILTER label that variants must have to be annotated. "
+ "Only variants with this label will be written in the output.", required = false, arity = 1)
public String filter = null;

@DynamicParameter(names = "-D", description = "Dynamic parameters. Available parameters: "
+ "{population-frequencies=for internal purposes mainly. Full path to a json file containing Variant "
+ "documents that include lists of population frequencies objects. Will allow annotating the input file "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,16 @@
package org.opencb.cellbase.app.cli.main.annotation;

import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.FileEntry;
import org.opencb.biodata.models.variant.avro.StudyEntry;
import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.cellbase.lib.variant.annotation.VariantAnnotator;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.run.ParallelTaskRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.List;
import java.util.*;

/**
* Created by fjlopez on 11/02/16.
Expand All @@ -34,9 +36,16 @@ public class VariantAnnotatorTask implements

private final Logger logger = LoggerFactory.getLogger(this.getClass());
private List<VariantAnnotator> variantAnnotatorList;
private QueryOptions serverQueryOptions;
private static final String FILTER_PARAM = "filter";

public VariantAnnotatorTask(List<VariantAnnotator> variantAnnotatorList) {
this(variantAnnotatorList, new QueryOptions());
}

public VariantAnnotatorTask(List<VariantAnnotator> variantAnnotatorList, QueryOptions serverQueryOptions) {
this.variantAnnotatorList = variantAnnotatorList;
this.serverQueryOptions = serverQueryOptions;
}

public void pre() {
Expand All @@ -46,18 +55,42 @@ public void pre() {
}

public List<Variant> apply(List<Variant> batch) throws Exception {
List<Variant> variantListToAnnotate = filterReferenceBlocksOut(batch);
List<Variant> variantListToAnnotate = filter(batch);
for (VariantAnnotator variantAnnotator : variantAnnotatorList) {
variantAnnotator.run(variantListToAnnotate);
}
return variantListToAnnotate;
}

private List<Variant> filterReferenceBlocksOut(List<Variant> variantList) {
private List<Variant> filter(List<Variant> variantList) {
List<Variant> filteredVariantList = new ArrayList<>(variantList.size());
String queryOptionsFilterValue = null;
if (serverQueryOptions != null && serverQueryOptions.containsKey(FILTER_PARAM)) {
queryOptionsFilterValue = (String) serverQueryOptions.get(FILTER_PARAM);
}
for (Variant variant : variantList) {
// true when we find a FILTER match. to prevent variant being added twice.
boolean variantFound = false;
// filter out reference blocks
if (!VariantType.NO_VARIATION.equals(variant.getType())) {
filteredVariantList.add(variant);
// if FILTER param set, VCF line must match or it's skipped
if (queryOptionsFilterValue != null) {
Iterator<StudyEntry> studyIterator = variant.getImpl().getStudies().iterator();
while (studyIterator.hasNext() && !variantFound) {
for (FileEntry fileEntry : studyIterator.next().getFiles()) {
Map<String, String> attributes = fileEntry.getData();
String vcfFilterValue = attributes.get("FILTER");
if (vcfFilterValue != null && vcfFilterValue.equalsIgnoreCase(queryOptionsFilterValue)) {
// matched, variant added. we are done here.
filteredVariantList.add(variant);
variantFound = true;
break;
}
}
}
} else {
filteredVariantList.add(variant);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ private List<ParallelTaskRunner.TaskWithException<Variant, Variant, Exception>>

for (int i = 0; i < numThreads; i++) {
List<VariantAnnotator> variantAnnotatorList = createAnnotators();
variantAnnotatorTaskList.add(new VariantAnnotatorTask(variantAnnotatorList));
variantAnnotatorTaskList.add(new VariantAnnotatorTask(variantAnnotatorList, serverQueryOptions));
}
return variantAnnotatorTaskList;
}
Expand Down Expand Up @@ -576,6 +576,7 @@ private void checkParameters() throws IOException, CellBaseException {
leftAlign = !variantAnnotationCommandOptions.skipLeftAlign;
// Update serverQueryOptions
serverQueryOptions.put("checkAminoAcidChange", variantAnnotationCommandOptions.checkAminoAcidChange);
serverQueryOptions.put("filter", variantAnnotationCommandOptions.filter);

// output file
if (variantAnnotationCommandOptions.output != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -965,4 +965,50 @@ private void cleanUp() throws IOException {
return variantAnnotationCommandOptions;
}

@Test
public void testFilter() throws Exception {
cleanUp();

// Set up annotation CLI options: NOTE checkAminoAcidChange is NOT enabled
CellBaseCliOptionsParser.VariantAnnotationCommandOptions variantAnnotationCommandOptions
= new CellBaseCliOptionsParser().getVariantAnnotationCommandOptions();
variantAnnotationCommandOptions.assembly = "GRCh37";

variantAnnotationCommandOptions.commonOptions.conf = resourcesFolder.resolve("commandExecutor").toString();
variantAnnotationCommandOptions.input
= resourcesFolder.resolve("commandExecutor/proteinChangeMatch/proband.duprem.atomic.left.split.vcf.gz").toString();
variantAnnotationCommandOptions.output = OUTPUT_FILENAME;
variantAnnotationCommandOptions.local = true;
variantAnnotationCommandOptions.species = "hsapiens";
variantAnnotationCommandOptions.filter = "PASS";
// Annotate
VariantAnnotationCommandExecutor variantAnnotationCommandExecutor
= new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions);
variantAnnotationCommandExecutor.loadCellBaseConfiguration();

variantAnnotationCommandExecutor.execute();
// Load annotated variants
List<Variant> variantList = loadResult();

// one variant has the PASS filter
assertEquals(1, variantList.size());

variantAnnotationCommandOptions.filter = "BAD FILTER";
variantAnnotationCommandExecutor = new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions);
variantAnnotationCommandExecutor.loadCellBaseConfiguration();
variantAnnotationCommandExecutor.execute();
variantList = loadResult();

// one variant has the PASS filter. there should be no results!
assertEquals(0, variantList.size());

variantAnnotationCommandOptions.filter = null;
variantAnnotationCommandExecutor = new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions);
variantAnnotationCommandExecutor.loadCellBaseConfiguration();
variantAnnotationCommandExecutor.execute();
variantList = loadResult();

// no filter 1 results
assertEquals(1, variantList.size());
}
}