Skip to content

Commit

Permalink
Merge pull request #54 from hammerlab/stop-codons-in-protein-sequence
Browse files Browse the repository at this point in the history
Trim protein sequences to not include stop codons
  • Loading branch information
iskandr committed May 27, 2016
2 parents 0e37f28 + 2ecda5d commit 1d3d8d0
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
try:
with open(readme_filename, 'r') as f:
readme = f.read()
# create README.rst for deploying on Travis
rst_readme_filename = readme_filename.replace(".md", ".rst")
with open(rst_readme_filename, "w"):
f.write(readme)
except:
readme = ""

Expand Down
2 changes: 1 addition & 1 deletion topiary/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
from . import commandline_args

__version__ = '0.0.17'
__version__ = '0.0.19'

__all__ = [
"LazyLigandomeDict",
Expand Down
22 changes: 14 additions & 8 deletions topiary/sequence_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,17 @@ def protein_subsequences_around_mutations(effects, padding_around_mutation):
seq_start_offset = max(
0,
mutation_start - padding_around_mutation)
# some pseudogenes have stop codons in the reference sequence,
# if we try to use them for epitope prediction we should trim
# the sequence to not include the stop character '*'
first_stop_codon_index = protein_sequence.find("*")
if first_stop_codon_index < 0:
first_stop_codon_index = len(protein_sequence)

seq_end_offset = min(
len(protein_sequence),
first_stop_codon_index,
mutation_end + padding_around_mutation)
subsequence = protein_sequence[seq_start_offset:seq_end_offset]
print(effect, padding_around_mutation, subsequence, len(subsequence))
protein_subsequences[effect] = subsequence
protein_subsequence_start_offsets[effect] = seq_start_offset
return protein_subsequences, protein_subsequence_start_offsets
Expand All @@ -55,12 +61,12 @@ def check_padding_around_mutation(given_padding, epitope_lengths):
else:
require_integer(given_padding, "Padding around mutation")
if given_padding < min_required_padding:
raise ValueError("Padding around mutation %d cannot "
"be less than %d for epitope lengths "
"%s" % (
given_padding,
min_required_padding,
epitope_lengths))
raise ValueError(
"Padding around mutation %d cannot be less than %d "
"for epitope lengths %s" % (
given_padding,
min_required_padding,
epitope_lengths))
return given_padding

def contains_mutant_residues(
Expand Down

0 comments on commit 1d3d8d0

Please sign in to comment.