Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Register subparsers recursively #1640

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions augur/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .debug import DEBUGGING
from .errors import AugurError
from .io.print import print_err
from .argparse_ import add_command_subparsers, add_default_command
from .argparse_ import register_commands, add_default_command

DEFAULT_AUGUR_RECURSION_LIMIT = 10000
sys.setrecursionlimit(int(os.environ.get("AUGUR_RECURSION_LIMIT") or DEFAULT_AUGUR_RECURSION_LIMIT))
Expand Down Expand Up @@ -52,14 +52,15 @@

def make_parser():
parser = argparse.ArgumentParser(
prog = "augur",
description = "Augur: A bioinformatics toolkit for phylogenetic analysis.")
prog = "augur",
description = "Augur: A bioinformatics toolkit for phylogenetic analysis.",
formatter_class = argparse.ArgumentDefaultsHelpFormatter,
)

add_default_command(parser)
add_version_alias(parser)

subparsers = parser.add_subparsers()
add_command_subparsers(subparsers, COMMANDS)
register_commands(parser, COMMANDS)

return parser

Expand Down
22 changes: 13 additions & 9 deletions augur/argparse_.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""
Custom helpers for the argparse standard library.
"""
from argparse import Action, ArgumentDefaultsHelpFormatter
import argparse
from argparse import Action


# Include this in an argument help string to suppress the automatic appending
Expand Down Expand Up @@ -30,16 +31,14 @@ def run(args):
parser.set_defaults(__command__ = default_command)


def add_command_subparsers(subparsers, commands, command_attribute='__command__'):
def register_commands(parser: argparse.ArgumentParser, commands, command_attribute='__command__'):
"""
Add subparsers for each command module.

Parameters
----------
subparsers: argparse._SubParsersAction
The special subparsers action object created by the parent parser
via `parser.add_subparsers()`.

parser
ArgumentParser object.
commands: list[types.ModuleType]
A list of modules that are commands that require their own subparser.
Each module is required to have a `register_parser` function to add its own
Expand All @@ -49,6 +48,8 @@ def add_command_subparsers(subparsers, commands, command_attribute='__command__'
Optional attribute name for the commands. The default is `__command__`,
which allows top level augur to run commands directly via `args.__command__.run()`.
"""
subparsers = parser.add_subparsers()

for command in commands:
# Allow each command to register its own subparser
subparser = command.register_parser(subparsers)
Expand All @@ -57,9 +58,8 @@ def add_command_subparsers(subparsers, commands, command_attribute='__command__'
if command_attribute:
subparser.set_defaults(**{command_attribute: command})

# Use the same formatting class for every command for consistency.
# Set here to avoid repeating it in every command's register_parser().
subparser.formatter_class = ArgumentDefaultsHelpFormatter
# Ensure all subparsers format like the top-level parser
subparser.formatter_class = parser.formatter_class

if not subparser.description and command.__doc__:
subparser.description = command.__doc__
Expand All @@ -68,6 +68,10 @@ def add_command_subparsers(subparsers, commands, command_attribute='__command__'
if not getattr(command, "run", None):
add_default_command(subparser)

# Recursively register any subcommands
if getattr(subparser, "subcommands", None):
register_commands(subparser, subparser.subcommands)
Comment on lines +71 to +73
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This breaks augur curate subcommands

$ cat records.ndjson | augur curate passthru
Traceback (most recent call last):
  File "…/augur/__init__.py", line 72, in run
    return args.__command__.run(args)
TypeError: run() missing 1 required positional argument: 'records'

because each subcommand's run function is expected to be called through augur curate's own run function, but it is now called directly at the top level.

Maybe recursively registering subcommands is not the right thing to do in this codebase.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember having to work around the top level run when initially adding the augur curate subcommand.

# Using a subcommand attribute so subcommands are not directly
# run by top level Augur. Process I/O in `curate`` so individual
# subcommands do not have to worry about it.
add_command_subparsers(subparsers, SUBCOMMANDS, SUBCOMMAND_ATTRIBUTE)

Would it be weird to keep augur curate as a special case?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two options:

  1. Further adapt how augur curate commands share functionality so calling their run() from the top-level works. This could be done relatively straightforwardly.
  2. Don't recursively register subcommands; leave the pattern of commands with subcommands invoking register_commands(…) themselves.

I don't think either gets the way of the functional improvements we want to make? So it's a matter of preference.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the input – I'll try option 1 above to align with Nextstrain CLI code.



class HideAsFalseAction(Action):
"""
Expand Down
90 changes: 2 additions & 88 deletions augur/curate/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
"""
A suite of commands to help with data curation.
"""
import argparse
import sys
from collections import deque
from textwrap import dedent
from typing import Iterable, Set

from augur.argparse_ import ExtendOverwriteDefault, add_command_subparsers
from augur.errors import AugurError
from augur.io.json import dump_ndjson, load_ndjson
from augur.io.metadata import DEFAULT_DELIMITERS, InvalidDelimiter, read_table_to_dict, read_metadata_with_sequences, write_records_to_tsv
from augur.io.metadata import InvalidDelimiter, read_table_to_dict, read_metadata_with_sequences, write_records_to_tsv
from augur.io.sequences import write_records_to_fasta
from augur.types import DataErrorMethod
from . import format_dates, normalize_strings, passthru, titlecase, apply_geolocation_rules, apply_record_annotations, abbreviate_authors, parse_genbank_location, transform_strain_name, rename


Expand All @@ -31,93 +28,10 @@
]


def create_shared_parser():
"""
Creates an argparse.ArgumentParser that is intended to be used as a parent
parser¹ for all `augur curate` subcommands. This should include all options
that are intended to be shared across the subcommands.

Note that any options strings used here cannot be used in individual subcommand
subparsers unless the subparser specifically sets `conflict_handler='resolve'` ²,
then the subparser option will override the option defined here.

Based on https://stackoverflow.com/questions/23296695/permit-argparse-global-flags-after-subcommand/23296874#23296874

¹ https://docs.python.org/3/library/argparse.html#parents
² https://docs.python.org/3/library/argparse.html#conflict-handler
"""
shared_parser = argparse.ArgumentParser(add_help=False)

shared_inputs = shared_parser.add_argument_group(
title="INPUTS",
description="""
Input options shared by all `augur curate` commands.
If no input options are provided, commands will try to read NDJSON records from stdin.
""")
shared_inputs.add_argument("--metadata",
help="Input metadata file. May be plain text (TSV, CSV) or an Excel or OpenOffice spreadsheet workbook file. When an Excel or OpenOffice workbook, only the first visible worksheet will be read and initial empty rows/columns will be ignored. Accepts '-' to read plain text from stdin.")
shared_inputs.add_argument("--id-column",
help="Name of the metadata column that contains the record identifier for reporting duplicate records. "
"Uses the first column of the metadata file if not provided. "
"Ignored if also providing a FASTA file input.")
shared_inputs.add_argument("--metadata-delimiters", default=DEFAULT_DELIMITERS, nargs="+", action=ExtendOverwriteDefault,
help="Delimiters to accept when reading a plain text metadata file. Only one delimiter will be inferred.")

shared_inputs.add_argument("--fasta",
help="Plain or gzipped FASTA file. Headers can only contain the sequence id used to match a metadata record. " +
"Note that an index file will be generated for the FASTA file as <filename>.fasta.fxi")
shared_inputs.add_argument("--seq-id-column",
help="Name of metadata column that contains the sequence id to match sequences in the FASTA file.")
shared_inputs.add_argument("--seq-field",
help="The name to use for the sequence field when joining sequences from a FASTA file.")

shared_inputs.add_argument("--unmatched-reporting",
type=DataErrorMethod.argtype,
choices=list(DataErrorMethod),
default=DataErrorMethod.ERROR_FIRST,
help="How unmatched records from combined metadata/FASTA input should be reported.")
shared_inputs.add_argument("--duplicate-reporting",
type=DataErrorMethod.argtype,
choices=list(DataErrorMethod),
default=DataErrorMethod.ERROR_FIRST,
help="How should duplicate records be reported.")

shared_outputs = shared_parser.add_argument_group(
title="OUTPUTS",
description="""
Output options shared by all `augur curate` commands.
If no output options are provided, commands will output NDJSON records to stdout.
""")
shared_outputs.add_argument("--output-metadata",
help="Output metadata TSV file. Accepts '-' to output TSV to stdout.")

shared_outputs.add_argument("--output-fasta",
help="Output FASTA file.")
shared_outputs.add_argument("--output-id-field",
help="The record field to use as the sequence identifier in the FASTA output.")
shared_outputs.add_argument("--output-seq-field",
help="The record field that contains the sequence for the FASTA output. "
"This field will be deleted from the metadata output.")

return shared_parser


def register_parser(parent_subparsers):
shared_parser = create_shared_parser()
parser = parent_subparsers.add_parser("curate", help=__doc__)

# Add print_help so we can run it when no subcommands are called
parser.set_defaults(print_help = parser.print_help)

# Add subparsers for subcommands
subparsers = parser.add_subparsers(dest="subcommand", required=False)
# Add the shared_parser to make it available for subcommands
# to include in their own parser
subparsers.shared_parser = shared_parser
# Using a subcommand attribute so subcommands are not directly
# run by top level Augur. Process I/O in `curate`` so individual
# subcommands do not have to worry about it.
add_command_subparsers(subparsers, SUBCOMMANDS, SUBCOMMAND_ATTRIBUTE)
parser.subcommands = SUBCOMMANDS

return parser

Expand Down
3 changes: 2 additions & 1 deletion augur/curate/abbreviate_authors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Generator, List
from augur.io.print import print_err
from augur.utils import first_line
from .argparse_shared_parser import shared_parser


def parse_authors(
Expand Down Expand Up @@ -52,7 +53,7 @@ def register_parser(
) -> argparse._SubParsersAction:
parser = parent_subparsers.add_parser(
"abbreviate-authors",
parents=[parent_subparsers.shared_parser], # type: ignore
parents=[shared_parser], # type: ignore
help=first_line(__doc__),
)

Expand Down
3 changes: 2 additions & 1 deletion augur/curate/apply_geolocation_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from augur.errors import AugurError
from augur.io.print import print_err
from augur.utils import first_line
from .argparse_shared_parser import shared_parser


class CyclicGeolocationRulesError(AugurError):
Expand Down Expand Up @@ -188,7 +189,7 @@ def transform_geolocations(geolocation_rules, geolocation):

def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("apply-geolocation-rules",
parents=[parent_subparsers.shared_parser],
parents=[shared_parser],
help=first_line(__doc__))

parser.add_argument("--region-field", default="region",
Expand Down
3 changes: 2 additions & 1 deletion augur/curate/apply_record_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
from augur.errors import AugurError
from augur.io.print import print_err
from augur.utils import first_line
from .argparse_shared_parser import shared_parser


def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("apply-record-annotations",
parents=[parent_subparsers.shared_parser],
parents=[shared_parser],
help=first_line(__doc__))

parser.add_argument("--annotations", metavar="TSV", required=True,
Expand Down
78 changes: 78 additions & 0 deletions augur/curate/argparse_shared_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import argparse
from augur.argparse_ import ExtendOverwriteDefault
from augur.io.metadata import DEFAULT_DELIMITERS
from augur.types import DataErrorMethod


def create_shared_parser():
"""
Creates an argparse.ArgumentParser that is intended to be used as a parent
parser¹ for all `augur curate` subcommands. This should include all options
that are intended to be shared across the subcommands.

Note that any options strings used here cannot be used in individual subcommand
subparsers unless the subparser specifically sets `conflict_handler='resolve'` ²,
then the subparser option will override the option defined here.

Based on https://stackoverflow.com/questions/23296695/permit-argparse-global-flags-after-subcommand/23296874#23296874

¹ https://docs.python.org/3/library/argparse.html#parents
² https://docs.python.org/3/library/argparse.html#conflict-handler
"""
shared_parser = argparse.ArgumentParser(add_help=False)

shared_inputs = shared_parser.add_argument_group(
title="INPUTS",
description="""
Input options shared by all `augur curate` commands.
If no input options are provided, commands will try to read NDJSON records from stdin.
""")
shared_inputs.add_argument("--metadata",
help="Input metadata file. May be plain text (TSV, CSV) or an Excel or OpenOffice spreadsheet workbook file. When an Excel or OpenOffice workbook, only the first visible worksheet will be read and initial empty rows/columns will be ignored. Accepts '-' to read plain text from stdin.")
shared_inputs.add_argument("--id-column",
help="Name of the metadata column that contains the record identifier for reporting duplicate records. "
"Uses the first column of the metadata file if not provided. "
"Ignored if also providing a FASTA file input.")
shared_inputs.add_argument("--metadata-delimiters", default=DEFAULT_DELIMITERS, nargs="+", action=ExtendOverwriteDefault,
help="Delimiters to accept when reading a plain text metadata file. Only one delimiter will be inferred.")

shared_inputs.add_argument("--fasta",
help="Plain or gzipped FASTA file. Headers can only contain the sequence id used to match a metadata record. " +
"Note that an index file will be generated for the FASTA file as <filename>.fasta.fxi")
shared_inputs.add_argument("--seq-id-column",
help="Name of metadata column that contains the sequence id to match sequences in the FASTA file.")
shared_inputs.add_argument("--seq-field",
help="The name to use for the sequence field when joining sequences from a FASTA file.")

shared_inputs.add_argument("--unmatched-reporting",
type=DataErrorMethod.argtype,
choices=list(DataErrorMethod),
default=DataErrorMethod.ERROR_FIRST,
help="How unmatched records from combined metadata/FASTA input should be reported.")
shared_inputs.add_argument("--duplicate-reporting",
type=DataErrorMethod.argtype,
choices=list(DataErrorMethod),
default=DataErrorMethod.ERROR_FIRST,
help="How should duplicate records be reported.")

shared_outputs = shared_parser.add_argument_group(
title="OUTPUTS",
description="""
Output options shared by all `augur curate` commands.
If no output options are provided, commands will output NDJSON records to stdout.
""")
shared_outputs.add_argument("--output-metadata",
help="Output metadata TSV file. Accepts '-' to output TSV to stdout.")

shared_outputs.add_argument("--output-fasta",
help="Output FASTA file.")
shared_outputs.add_argument("--output-id-field",
help="The record field to use as the sequence identifier in the FASTA output.")
shared_outputs.add_argument("--output-seq-field",
help="The record field that contains the sequence for the FASTA output. "
"This field will be deleted from the metadata output.")

return shared_parser


shared_parser = create_shared_parser()
3 changes: 2 additions & 1 deletion augur/curate/format_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from augur.io.print import print_err
from augur.types import DataErrorMethod
from .format_dates_directives import YEAR_DIRECTIVES, YEAR_MONTH_DIRECTIVES, YEAR_MONTH_DAY_DIRECTIVES
from .argparse_shared_parser import shared_parser


# Default date formats that this command should parse
Expand All @@ -24,7 +25,7 @@

def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("format-dates",
parents=[parent_subparsers.shared_parser],
parents=[shared_parser],
help=__doc__)

required = parser.add_argument_group(title="REQUIRED")
Expand Down
3 changes: 2 additions & 1 deletion augur/curate/normalize_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
import unicodedata

from augur.utils import first_line
from .argparse_shared_parser import shared_parser


def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("normalize-strings",
parents=[parent_subparsers.shared_parser],
parents=[shared_parser],
help=first_line(__doc__))

optional = parser.add_argument_group(title="OPTIONAL")
Expand Down
3 changes: 2 additions & 1 deletion augur/curate/parse_genbank_location.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Generator, List
from augur.io.print import print_err
from augur.utils import first_line
from .argparse_shared_parser import shared_parser


def parse_location(
Expand Down Expand Up @@ -50,7 +51,7 @@ def register_parser(
) -> argparse._SubParsersAction:
parser = parent_subparsers.add_parser(
"parse-genbank-location",
parents=[parent_subparsers.shared_parser], # type: ignore
parents=[shared_parser], # type: ignore
help=first_line(__doc__),
)

Expand Down
3 changes: 2 additions & 1 deletion augur/curate/passthru.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
Pass through records without doing any data transformations.
Useful for testing, troubleshooting, or just converting file formats.
"""
from .argparse_shared_parser import shared_parser


def register_parser(parent_subparsers):
return parent_subparsers.add_parser("passthru",
parents=[parent_subparsers.shared_parser],
parents=[shared_parser],
help=__doc__)


Expand Down
Loading
Loading