Skip to content

Commit

Permalink
add export to Dialign anchor files
Browse files Browse the repository at this point in the history
  • Loading branch information
trichter committed Oct 1, 2024
1 parent 2e832eb commit 91c1e09
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ dev:
* add multiprocessing option (--njobs)
* add aggressive mode when removing contradicting anchors (now the default)
* add export to GFF files
* add export to Dialign anchor files
* change names of some options
* add no_cds configuration parameter and --no-cds command line flag,
used to find anchors in non-coding and amino acid sequences
Expand Down
32 changes: 22 additions & 10 deletions anchorna/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from sugar import read
from anchorna.core import combine, cutout, find_my_anchors
from anchorna.io import jalview_features, read_anchors, load_selected_anchors
from anchorna.io import export_dialign, export_jalview, read_anchors, load_selected_anchors
from anchorna.util import _apply_mode


Expand Down Expand Up @@ -180,13 +180,20 @@ def _cmd_print(fname_anchor, verbose=False, mode='aa'):
def _cmd_load(fname_anchor):
_start_ipy(read_anchors(fname_anchor))

def _cmd_export(fname_anchor, out, mode='aa', score_use_fluke=None, fmt='gff'):
def _cmd_export(fname_anchor, out, mode='aa', score_use_fluke=None, fmt='gff',
fname=None):
assert mode in ('nt', 'cds', 'aa')
anchors = load_selected_anchors(fname_anchor)
if anchors.no_cds:
mode = 'aa'
if fmt == 'jalview':
outstr = jalview_features(anchors, mode=mode, score_use_fluke=score_use_fluke)
if fmt in ('jalview', 'dialign'):
if fmt == 'jalview':
outstr = export_jalview(anchors, mode=mode, score_use_fluke=score_use_fluke)
else:
if fname is None:
raise ValueError('--fname option missing for Dialign export')
seqs = read(fname)
outstr = export_dialign(anchors, seqs.ids, mode=mode, score_use_fluke=score_use_fluke)
if out is None:
print(outstr)
else:
Expand Down Expand Up @@ -318,7 +325,7 @@ def run_cmdline(cmd_args=None):
p_go = sub.add_parser('go', help=(msg:='find anchors and write them into anchor file'), description=msg)
p_print = sub.add_parser('print', help=(msg:='print contents of anchor file'), description=msg)
p_load = sub.add_parser('load', help=(msg:='load anchors into IPython session'), description=msg)
p_export = sub.add_parser('export', help=(msg:='export anchors to gff file (possibly using different mode than aa) or a JalView feature file'), description=msg)
p_export = sub.add_parser('export', help=(msg:='export anchors to gff file (possibly using different mode than aa), JalView feature file or Dialign anchor file'), description=msg)
p_view = sub.add_parser('view', help=(msg:='view anchors in JalView (i.e. call export and start JalView)'), description=msg)
msg = 'combine (and select or remove) anchors from one file or different files'
msg2 = ('An expression consists of a file name fname_anchor or fname_anchor|selection to select anchors or fname_anchor||expression to remove anchors, or fname|selection|expression. '
Expand All @@ -344,7 +351,7 @@ def run_cmdline(cmd_args=None):
p_cutout = sub.add_parser('cutout', help=msg, description=msg, epilog=msg2)


for p in (p_create, p_go, p_view, p_cutout):
for p in (p_create, p_go, p_export, p_view, p_cutout):
p.add_argument('-c', '--conf', default='anchorna.conf', help='configuration file to use (default: anchorna.conf)')
p_create.add_argument('--tutorial', action='store_true', help='copy GFF sequence file for tutorial')
p_create.add_argument('--no-cds', action='store_true', help='add the no_cds line to configuration, effect: directly use aa or nucleotide '
Expand All @@ -368,15 +375,20 @@ def run_cmdline(cmd_args=None):
g.add_argument('--no-cds', action='store_true', default=argparse.SUPPRESS)
for p in (p_export, p_view, p_cutout):
g = p.add_argument_group('optional arguments', description='Use these flags to overwrite values in the config file.')
if p != p_export:
g.add_argument('--fname', default=argparse.SUPPRESS)
g.add_argument('--fname', default=argparse.SUPPRESS)
g.add_argument('--score-use-fluke', default=argparse.SUPPRESS, type=int)
# g.add_argument('--no-cds', action='store_true', default=argparse.SUPPRESS)

p_export.add_argument('fname_anchor', help='anchor file name')
p_export.add_argument('-o', '--out', help='output file name (by default prints to stdout)')
p_export.add_argument('--fmt', help='format for export, default gff', default='gff',
choices=('gff', 'jalview'))
msg = (
'format for export, default gff, '
'for Dialign export, '
'please specify sequence filename you want to align with Dialign with --fname option, '
'the sequence order is needed for the Dialign export, default fname from config file'
)
p_export.add_argument('--fmt', help=msg, default='gff',
choices=('gff', 'jalview', 'dialign'))
p_view.add_argument('fname_anchor', help='anchor file name')
p_view.add_argument('--align', help='align sequences at given anchor')
p_combine.add_argument('fname_anchor', nargs='+', help='anchor file name')
Expand Down
30 changes: 28 additions & 2 deletions anchorna/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,40 @@ def write_json(obj, fname):
json.dump(obj, f, cls=_AnchorJSONEncoder)


def export_dialign(anchors, seqids, mode='aa', score_use_fluke=None):
"""
Export anchors to Dialign anchor file
"""
assert mode in ('nt', 'cds', 'aa')
sortkey_score = lambda f: f.score
sortkey_ids = lambda f: seqids.index(f.seqid)
content = []
for anchor in anchors:
f0 = anchor.sort(key=sortkey_score)[-1]
start0 = _apply_mode(f0.start, f0.offset, mode)
i = sortkey_ids(f0)
anchor.sort(key=sortkey_ids)
for f in anchor:
j = sortkey_ids(f)
if (f == f0 or score_use_fluke is not None and
f.score < score_use_fluke):
continue
assert f.len == f0.len
start = _apply_mode(f.start, f.offset, mode)
content.append(
f'{i+1} {j+1} {start0+1} {start+1} {f.len} {f.score}\n'
)
return ''.join(content)


def _make_rgb_transparent(rgb, bg_rgb, alpha):
return [alpha * c1 + (1 - alpha) * c2
for (c1, c2) in zip(to_rgb(rgb), to_rgb(bg_rgb))]


def jalview_features(anchors, mode='aa', score_use_fluke=None):
def export_jalview(anchors, mode='aa', score_use_fluke=None):
"""
Write anchors to Jalview feature file
Export anchors to Jalview feature file
"""
assert mode in ('nt', 'cds', 'aa')
cols = list(mcolors.TABLEAU_COLORS.values())
Expand Down
4 changes: 4 additions & 0 deletions anchorna/tests/test_anchorna.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ def test_anchorna_workflow_subset():
assert '' in check('anchorna export -m cds anchors.gff -o test_anchor_export.gff')
with pytest.raises(IOError):
read_anchors('test_anchor_export.gff')
read_anchors('test_anchor_export.gff', check_header=False)
assert '1' in check('anchorna export --fmt dialign anchors.gff')
assert '1' in check('anchorna export --fmt dialign -m nt anchors.gff')
assert '1' in check('anchorna export --fmt dialign -m cds anchors.gff')
with patch('subprocess.run'): # we do not want to actually start jalview here
assert '' == check('anchorna view anchors.gff')
assert '' == check('anchorna view anchors.gff --align a1')
Expand Down

0 comments on commit 91c1e09

Please sign in to comment.