Skip to content

Commit

Permalink
Merge pull request #392 from smoe/adding_line_numbers_to_check_dev
Browse files Browse the repository at this point in the history
check: Adding line number to error messages.
  • Loading branch information
JoseEspinosa authored Jul 3, 2024
2 parents 651b61d + 80a12ac commit c3e1861
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [[#370](https://github.com/nf-core/chipseq/issues/370)] - Fix stack overflow exceptions in phantompeakqualtools ([see here](https://github.com/kundajelab/phantompeakqualtools/issues/3)).
- [[#387](https://github.com/nf-core/chipseq/issues/387)] - Get rid of the `lib` folder and rearrange the pipeline accordingly.
- [[#385](https://github.com/nf-core/chipseq/issues/385)] - Fix `--save_unaligned` description in schema.
- [[PR #392](https://github.com/nf-core/chipseq/pull/392)] - Adding line numbers to warnings/errors messages in `bin/check_samplesheet.py`

### Software dependencies

Expand Down
28 changes: 14 additions & 14 deletions bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,22 +58,22 @@ def check_samplesheet(file_in, file_out):
sys.exit(1)

## Check sample entries
for line in fin:
for line_number, line in enumerate(fin, start=1):
if line.strip():
lspl = [x.strip().strip('"') for x in line.strip().split(",")]

# Check valid number of columns per row
if len(lspl) < len(HEADER):
print_error(
"Invalid number of columns (minimum = {})!".format(len(HEADER)),
"Line",
"Invalid number of columns (found = {}, minimum = {})!".format(len(lspl),len(HEADER)),
"Line {}".format(line_number),
line,
)
num_cols = len([x for x in lspl[: len(HEADER)] if x])
if num_cols < MIN_COLS:
print_error(
"Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
"Line",
"Invalid number of populated columns (found = {}, minimum = {})!".format(num_cols,MIN_COLS),
"Line {}".format(line_number),
line,
)

Expand All @@ -83,23 +83,23 @@ def check_samplesheet(file_in, file_out):
print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}")
sample = sample.replace(" ", "_")
if not sample:
print_error("Sample entry has not been specified!", "Line", line)
print_error("Sample entry has not been specified!", "Line {}".format(line_number), line)

## Check FastQ file extension
for fastq in [fastq_1, fastq_2]:
if fastq:
if fastq.find(" ") != -1:
print_error("FastQ file contains spaces!", "Line", line)
print_error("FastQ file contains spaces!", "Line {}".format(lineNo), line)
if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"):
print_error(
"FastQ file does not have extension '.fastq.gz' or '.fq.gz'!",
"Line",
"Line {}".format(lineNo),
line,
)

## Check replicate column is integer
if not replicate.isdecimal():
print_error("Replicate id not an integer!", "Line", line)
print_error("Replicate id not an integer!", "Line {}".format(lineNo), line)
sys.exit(1)

## Check antibody and control columns have valid values
Expand All @@ -110,7 +110,7 @@ def check_samplesheet(file_in, file_out):
if not control:
print_error(
"Both antibody and control columns must be specified!",
"Line",
"Line {}".format(line_number),
line,
)

Expand All @@ -119,13 +119,13 @@ def check_samplesheet(file_in, file_out):
print(f"WARNING: Spaces have been replaced by underscores for control: {control}")
control = control.replace(" ", "_")
if not control_replicate.isdecimal():
print_error("Control replicate id not an integer!", "Line", line)
print_error("Control replicate id not an integer!", "Line {}".format(line_number), line)
sys.exit(1)
control = "{}_REP{}".format(control, control_replicate)
if not antibody:
print_error(
"Both antibody and control columns must be specified!",
"Line",
"Line {}".format(line_number),
line,
)

Expand All @@ -138,7 +138,7 @@ def check_samplesheet(file_in, file_out):
elif sample and fastq_1 and not fastq_2:
sample_info = ["1", fastq_1, fastq_2, replicate, antibody, control]
else:
print_error("Invalid combination of columns provided!", "Line", line)
print_error("Invalid combination of columns provided!", "Line {}".format(line_number), line)

## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, replicate, antibody, control ]]}
replicate = int(replicate)
Expand All @@ -149,7 +149,7 @@ def check_samplesheet(file_in, file_out):
sample_mapping_dict[sample][replicate] = [sample_info]
else:
if sample_info in sample_mapping_dict[sample][replicate]:
print_error("Samplesheet contains duplicate rows!", "Line", line)
print_error("Samplesheet contains duplicate rows!", "Line {}".format(line_number), line)
else:
sample_mapping_dict[sample][replicate].append(sample_info)

Expand Down

0 comments on commit c3e1861

Please sign in to comment.