diff --git a/CHANGELOG.md b/CHANGELOG.md index 074807bd..0a0f4119 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - MT del script to detect mitochondrial deletions [#349](https://github.com/nf-core/raredisease/pull/349) - eKLIPse to identify large mitochondrial deletions [#365](https://github.com/nf-core/raredisease/pull/365) - UPD+Chromograph to identify and visualize UPD sites and regions in the chromosomes [#364](https://github.com/nf-core/raredisease/pull/364) and [#366](https://github.com/nf-core/raredisease/pull/366) +- Added check for presence of case id for each sample in samplesheet [#357](https://github.com/nf-core/raredisease/pull/357) ### Fixed diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index ac46acd3..90486cb6 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -38,6 +38,7 @@ def __init__( first_col="fastq_1", second_col="fastq_2", single_col="single_end", + case_id_col="case_id", **kwargs, ): """ @@ -53,6 +54,8 @@ def __init__( single_col (str): The name of the new column that will be inserted and records whether the sample contains single- or paired-end sequencing reads (default "single_end"). + case_id_col (str): The name of the column that contains the case_id + (default "case_id"). """ super().__init__(**kwargs) @@ -60,6 +63,7 @@ def __init__( self._first_col = first_col self._second_col = second_col self._single_col = single_col + self._case_id_col = case_id_col self._seen = set() self.modified = [] @@ -76,6 +80,7 @@ def validate_and_transform(self, row): self._validate_first(row) self._validate_second(row) self._validate_pair(row) + self._validate_case_id(row) self._seen.add((row[self._sample_col], row[self._first_col])) self.modified.append(row) @@ -108,6 +113,13 @@ def _validate_pair(self, row): else: row[self._single_col] = True + def _validate_case_id(self, row): + """Assert that the case id exists and convert spaces to underscores.""" + if len(row[self._case_id_col]) <= 0: + raise AssertionError("Case ID input is required.") + # Sanitize id slightly. + row[self._case_id_col] = row[self._case_id_col].replace(" ", "_") + def _validate_fastq_format(self, filename): """Assert that a given filename has one of the expected FASTQ extensions.""" if not any(filename.endswith(extension) for extension in self.VALID_FORMATS):