From b741e6d42830084f3caad1eb91dd64ff17de964d Mon Sep 17 00:00:00 2001 From: EmmaCAndersson Date: Tue, 30 May 2023 10:41:15 +0200 Subject: [PATCH 1/3] added check for case id --- bin/check_samplesheet.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 041b4e58..16a8ee05 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -35,6 +35,7 @@ def __init__( first_col="fastq_1", second_col="fastq_2", single_col="single_end", + case_id_col="case_id" **kwargs, ): """ @@ -50,6 +51,8 @@ def __init__( single_col (str): The name of the new column that will be inserted and records whether the sample contains single- or paired-end sequencing reads (default "single_end"). + case_id_col (str): The name of the column that contains the case_id + (default "case_id"). """ super().__init__(**kwargs) @@ -57,6 +60,7 @@ def __init__( self._first_col = first_col self._second_col = second_col self._single_col = single_col + self._case_id_col = case_id_col self._seen = set() self.modified = [] @@ -73,6 +77,7 @@ def validate_and_transform(self, row): self._validate_first(row) self._validate_second(row) self._validate_pair(row) + self._validate_case_id(row) self._seen.add((row[self._sample_col], row[self._first_col])) self.modified.append(row) @@ -105,6 +110,13 @@ def _validate_pair(self, row): else: row[self._single_col] = True + def _validate_case_id(self, row): + """Assert that the case id exists and convert spaces to underscores.""" + if len(row[self._case_id_col]) <= 0: + raise AssertionError("Case ID input is required.") + # Sanitize id slightly. + row[self._case_id_col] = row[self._case_id_col].replace(" ", "_") + def _validate_fastq_format(self, filename): """Assert that a given filename has one of the expected FASTQ extensions.""" if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): From d71796054c3c933a9413357daef8b221e9f39f1d Mon Sep 17 00:00:00 2001 From: EmmaCAndersson Date: Wed, 14 Jun 2023 11:02:07 +0200 Subject: [PATCH 2/3] adding a sneaky comma --- bin/check_samplesheet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 16a8ee05..d53b95cd 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -35,7 +35,7 @@ def __init__( first_col="fastq_1", second_col="fastq_2", single_col="single_end", - case_id_col="case_id" + case_id_col="case_id", **kwargs, ): """ From 900af1bffaafa48ead9b7d768f38297d64f11911 Mon Sep 17 00:00:00 2001 From: EmmaCAndersson <46917861+EmmaCAndersson@users.noreply.github.com> Date: Fri, 7 Jul 2023 13:27:05 +0200 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 074807bd..0a0f4119 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - MT del script to detect mitochondrial deletions [#349](https://github.com/nf-core/raredisease/pull/349) - eKLIPse to identify large mitochondrial deletions [#365](https://github.com/nf-core/raredisease/pull/365) - UPD+Chromograph to identify and visualize UPD sites and regions in the chromosomes [#364](https://github.com/nf-core/raredisease/pull/364) and [#366](https://github.com/nf-core/raredisease/pull/366) +- Added check for presence of case id for each sample in samplesheet [#357](https://github.com/nf-core/raredisease/pull/357) ### Fixed