Skip to content

Commit

Permalink
Merge pull request #357 from ClinicalGenomicsGBG/check_case_id
Browse files Browse the repository at this point in the history
added check for case id
  • Loading branch information
evasterviga authored Jul 7, 2023
2 parents 4d37b19 + 900af1b commit 38e01e0
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- MT del script to detect mitochondrial deletions [#349](https://github.com/nf-core/raredisease/pull/349)
- eKLIPse to identify large mitochondrial deletions [#365](https://github.com/nf-core/raredisease/pull/365)
- UPD+Chromograph to identify and visualize UPD sites and regions in the chromosomes [#364](https://github.com/nf-core/raredisease/pull/364) and [#366](https://github.com/nf-core/raredisease/pull/366)
- Added check for presence of case id for each sample in samplesheet [#357](https://github.com/nf-core/raredisease/pull/357)

### Fixed

Expand Down
12 changes: 12 additions & 0 deletions bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def __init__(
first_col="fastq_1",
second_col="fastq_2",
single_col="single_end",
case_id_col="case_id",
**kwargs,
):
"""
Expand All @@ -53,13 +54,16 @@ def __init__(
single_col (str): The name of the new column that will be inserted and
records whether the sample contains single- or paired-end sequencing
reads (default "single_end").
case_id_col (str): The name of the column that contains the case_id
(default "case_id").
"""
super().__init__(**kwargs)
self._sample_col = sample_col
self._first_col = first_col
self._second_col = second_col
self._single_col = single_col
self._case_id_col = case_id_col
self._seen = set()
self.modified = []

Expand All @@ -76,6 +80,7 @@ def validate_and_transform(self, row):
self._validate_first(row)
self._validate_second(row)
self._validate_pair(row)
self._validate_case_id(row)
self._seen.add((row[self._sample_col], row[self._first_col]))
self.modified.append(row)

Expand Down Expand Up @@ -108,6 +113,13 @@ def _validate_pair(self, row):
else:
row[self._single_col] = True

def _validate_case_id(self, row):
"""Assert that the case id exists and convert spaces to underscores."""
if len(row[self._case_id_col]) <= 0:
raise AssertionError("Case ID input is required.")
# Sanitize id slightly.
row[self._case_id_col] = row[self._case_id_col].replace(" ", "_")

def _validate_fastq_format(self, filename):
"""Assert that a given filename has one of the expected FASTQ extensions."""
if not any(filename.endswith(extension) for extension in self.VALID_FORMATS):
Expand Down

0 comments on commit 38e01e0

Please sign in to comment.