nextstrain · victorlin · Sep 25, 2024 · Sep 25, 2024 · Sep 24, 2024 · victorlin
diff --git a/augur/__init__.py b/augur/__init__.py
@@ -14,7 +14,7 @@
 from .debug import DEBUGGING
 from .errors import AugurError
 from .io.print import print_err
-from .argparse_ import add_command_subparsers, add_default_command
+from .argparse_ import register_commands, add_default_command
 
 DEFAULT_AUGUR_RECURSION_LIMIT = 10000
 sys.setrecursionlimit(int(os.environ.get("AUGUR_RECURSION_LIMIT") or DEFAULT_AUGUR_RECURSION_LIMIT))
@@ -52,14 +52,15 @@
 
 def make_parser():
     parser = argparse.ArgumentParser(
-        prog        = "augur",
-        description = "Augur: A bioinformatics toolkit for phylogenetic analysis.")
+        prog            = "augur",
+        description     = "Augur: A bioinformatics toolkit for phylogenetic analysis.",
+        formatter_class = argparse.ArgumentDefaultsHelpFormatter,
+    )
 
     add_default_command(parser)
     add_version_alias(parser)
 
-    subparsers = parser.add_subparsers()
-    add_command_subparsers(subparsers, COMMANDS)
+    register_commands(parser, COMMANDS)
 
     return parser
 

diff --git a/augur/argparse_.py b/augur/argparse_.py
@@ -1,7 +1,8 @@
 """
 Custom helpers for the argparse standard library.
 """
-from argparse import Action, ArgumentDefaultsHelpFormatter
+import argparse
+from argparse import Action
 
 
 # Include this in an argument help string to suppress the automatic appending
@@ -30,16 +31,14 @@ def run(args):
     parser.set_defaults(__command__ = default_command)
 
 
-def add_command_subparsers(subparsers, commands, command_attribute='__command__'):
+def register_commands(parser: argparse.ArgumentParser, commands, command_attribute='__command__'):
     """
     Add subparsers for each command module.
 
     Parameters
     ----------
-    subparsers: argparse._SubParsersAction
-        The special subparsers action object created by the parent parser
-        via `parser.add_subparsers()`.
-
+    parser
+        ArgumentParser object.
     commands: list[types.ModuleType]
         A list of modules that are commands that require their own subparser.
         Each module is required to have a `register_parser` function to add its own
@@ -49,6 +48,8 @@ def add_command_subparsers(subparsers, commands, command_attribute='__command__'
         Optional attribute name for the commands. The default is `__command__`,
         which allows top level augur to run commands directly via `args.__command__.run()`.
     """
+    subparsers = parser.add_subparsers()
+
     for command in commands:
         # Allow each command to register its own subparser
         subparser = command.register_parser(subparsers)
@@ -57,9 +58,8 @@ def add_command_subparsers(subparsers, commands, command_attribute='__command__'
         if command_attribute:
             subparser.set_defaults(**{command_attribute: command})
 
-        # Use the same formatting class for every command for consistency.
-        # Set here to avoid repeating it in every command's register_parser().
-        subparser.formatter_class = ArgumentDefaultsHelpFormatter
+        # Ensure all subparsers format like the top-level parser
+        subparser.formatter_class = parser.formatter_class
 
         if not subparser.description and command.__doc__:
             subparser.description = command.__doc__
@@ -68,6 +68,10 @@ def add_command_subparsers(subparsers, commands, command_attribute='__command__'
         if not getattr(command, "run", None):
             add_default_command(subparser)
 
+        # Recursively register any subcommands
+        if getattr(subparser, "subcommands", None):
+            register_commands(subparser, subparser.subcommands)
 # Using a subcommand attribute so subcommands are not directly 
 # run by top level Augur. Process I/O in `curate`` so individual 
 # subcommands do not have to worry about it. 
 add_command_subparsers(subparsers, SUBCOMMANDS, SUBCOMMAND_ATTRIBUTE) 
 # Using a subcommand attribute so subcommands are not directly 
 # run by top level Augur. Process I/O in `curate`` so individual 
 # subcommands do not have to worry about it. 
 add_command_subparsers(subparsers, SUBCOMMANDS, SUBCOMMAND_ATTRIBUTE) 
+
 
 class HideAsFalseAction(Action):
     """

diff --git a/augur/curate/__init__.py b/augur/curate/__init__.py
@@ -1,18 +1,15 @@
 """
 A suite of commands to help with data curation.
 """
-import argparse
 import sys
 from collections import deque
 from textwrap import dedent
 from typing import Iterable, Set
 
-from augur.argparse_ import ExtendOverwriteDefault, add_command_subparsers
 from augur.errors import AugurError
 from augur.io.json import dump_ndjson, load_ndjson
-from augur.io.metadata import DEFAULT_DELIMITERS, InvalidDelimiter, read_table_to_dict, read_metadata_with_sequences, write_records_to_tsv
+from augur.io.metadata import InvalidDelimiter, read_table_to_dict, read_metadata_with_sequences, write_records_to_tsv
 from augur.io.sequences import write_records_to_fasta
-from augur.types import DataErrorMethod
 from . import format_dates, normalize_strings, passthru, titlecase, apply_geolocation_rules, apply_record_annotations, abbreviate_authors, parse_genbank_location, transform_strain_name, rename
 
 
@@ -31,93 +28,10 @@
 ]
 
 
-def create_shared_parser():
-    """
-    Creates an argparse.ArgumentParser that is intended to be used as a parent
-    parser¹ for all `augur curate` subcommands. This should include all options
-    that are intended to be shared across the subcommands.
-
-    Note that any options strings used here cannot be used in individual subcommand
-    subparsers unless the subparser specifically sets `conflict_handler='resolve'` ²,
-    then the subparser option will override the option defined here.
-
-    Based on https://stackoverflow.com/questions/23296695/permit-argparse-global-flags-after-subcommand/23296874#23296874
-
-    ¹ https://docs.python.org/3/library/argparse.html#parents
-    ² https://docs.python.org/3/library/argparse.html#conflict-handler
-    """
-    shared_parser = argparse.ArgumentParser(add_help=False)
-
-    shared_inputs = shared_parser.add_argument_group(
-        title="INPUTS",
-        description="""
-            Input options shared by all `augur curate` commands.
-            If no input options are provided, commands will try to read NDJSON records from stdin.
-        """)
-    shared_inputs.add_argument("--metadata",
-        help="Input metadata file. May be plain text (TSV, CSV) or an Excel or OpenOffice spreadsheet workbook file. When an Excel or OpenOffice workbook, only the first visible worksheet will be read and initial empty rows/columns will be ignored. Accepts '-' to read plain text from stdin.")
-    shared_inputs.add_argument("--id-column",
-        help="Name of the metadata column that contains the record identifier for reporting duplicate records. "
-             "Uses the first column of the metadata file if not provided. "
-             "Ignored if also providing a FASTA file input.")
-    shared_inputs.add_argument("--metadata-delimiters", default=DEFAULT_DELIMITERS, nargs="+", action=ExtendOverwriteDefault,
-        help="Delimiters to accept when reading a plain text metadata file. Only one delimiter will be inferred.")
-
-    shared_inputs.add_argument("--fasta",
-        help="Plain or gzipped FASTA file. Headers can only contain the sequence id used to match a metadata record. " +
-             "Note that an index file will be generated for the FASTA file as <filename>.fasta.fxi")
-    shared_inputs.add_argument("--seq-id-column",
-        help="Name of metadata column that contains the sequence id to match sequences in the FASTA file.")
-    shared_inputs.add_argument("--seq-field",
-        help="The name to use for the sequence field when joining sequences from a FASTA file.")
-
-    shared_inputs.add_argument("--unmatched-reporting",
-        type=DataErrorMethod.argtype,
-        choices=list(DataErrorMethod),
-        default=DataErrorMethod.ERROR_FIRST,
-        help="How unmatched records from combined metadata/FASTA input should be reported.")
-    shared_inputs.add_argument("--duplicate-reporting",
-        type=DataErrorMethod.argtype,
-        choices=list(DataErrorMethod),
-        default=DataErrorMethod.ERROR_FIRST,
-        help="How should duplicate records be reported.")
-
-    shared_outputs = shared_parser.add_argument_group(
-        title="OUTPUTS",
-        description="""
-            Output options shared by all `augur curate` commands.
-            If no output options are provided, commands will output NDJSON records to stdout.
-        """)
-    shared_outputs.add_argument("--output-metadata",
-        help="Output metadata TSV file. Accepts '-' to output TSV to stdout.")
-
-    shared_outputs.add_argument("--output-fasta",
-        help="Output FASTA file.")
-    shared_outputs.add_argument("--output-id-field",
-        help="The record field to use as the sequence identifier in the FASTA output.")
-    shared_outputs.add_argument("--output-seq-field",
-        help="The record field that contains the sequence for the FASTA output. "
-             "This field will be deleted from the metadata output.")
-
-    return shared_parser
-
-
 def register_parser(parent_subparsers):
-    shared_parser = create_shared_parser()
     parser = parent_subparsers.add_parser("curate", help=__doc__)
 
-    # Add print_help so we can run it when no subcommands are called
-    parser.set_defaults(print_help = parser.print_help)
-
-    # Add subparsers for subcommands
-    subparsers = parser.add_subparsers(dest="subcommand", required=False)
-    # Add the shared_parser to make it available for subcommands
-    # to include in their own parser
-    subparsers.shared_parser = shared_parser
-    # Using a subcommand attribute so subcommands are not directly
-    # run by top level Augur. Process I/O in `curate`` so individual
-    # subcommands do not have to worry about it.
-    add_command_subparsers(subparsers, SUBCOMMANDS, SUBCOMMAND_ATTRIBUTE)
+    parser.subcommands = SUBCOMMANDS
 
     return parser
 

diff --git a/augur/curate/abbreviate_authors.py b/augur/curate/abbreviate_authors.py
@@ -10,6 +10,7 @@
 from typing import Generator, List
 from augur.io.print import print_err
 from augur.utils import first_line
+from .argparse_shared_parser import shared_parser
 
 
 def parse_authors(
@@ -52,7 +53,7 @@ def register_parser(
 ) -> argparse._SubParsersAction:
     parser = parent_subparsers.add_parser(
         "abbreviate-authors",
-        parents=[parent_subparsers.shared_parser],  # type: ignore
+        parents=[shared_parser],  # type: ignore
         help=first_line(__doc__),
     )
 

diff --git a/augur/curate/apply_geolocation_rules.py b/augur/curate/apply_geolocation_rules.py
@@ -5,6 +5,7 @@
 from augur.errors import AugurError
 from augur.io.print import print_err
 from augur.utils import first_line
+from .argparse_shared_parser import shared_parser
 
 
 class CyclicGeolocationRulesError(AugurError):
@@ -188,7 +189,7 @@ def transform_geolocations(geolocation_rules, geolocation):
 
 def register_parser(parent_subparsers):
     parser = parent_subparsers.add_parser("apply-geolocation-rules",
-        parents=[parent_subparsers.shared_parser],
+        parents=[shared_parser],
         help=first_line(__doc__))
 
     parser.add_argument("--region-field", default="region",

diff --git a/augur/curate/apply_record_annotations.py b/augur/curate/apply_record_annotations.py
@@ -7,11 +7,12 @@
 from augur.errors import AugurError
 from augur.io.print import print_err
 from augur.utils import first_line
+from .argparse_shared_parser import shared_parser
 
 
 def register_parser(parent_subparsers):
     parser = parent_subparsers.add_parser("apply-record-annotations",
-        parents=[parent_subparsers.shared_parser],
+        parents=[shared_parser],
         help=first_line(__doc__))
 
     parser.add_argument("--annotations", metavar="TSV", required=True,

diff --git a/augur/curate/argparse_shared_parser.py b/augur/curate/argparse_shared_parser.py
@@ -0,0 +1,78 @@
+import argparse
+from augur.argparse_ import ExtendOverwriteDefault
+from augur.io.metadata import DEFAULT_DELIMITERS
+from augur.types import DataErrorMethod
+
+
+def create_shared_parser():
+    """
+    Creates an argparse.ArgumentParser that is intended to be used as a parent
+    parser¹ for all `augur curate` subcommands. This should include all options
+    that are intended to be shared across the subcommands.
+
+    Note that any options strings used here cannot be used in individual subcommand
+    subparsers unless the subparser specifically sets `conflict_handler='resolve'` ²,
+    then the subparser option will override the option defined here.
+
+    Based on https://stackoverflow.com/questions/23296695/permit-argparse-global-flags-after-subcommand/23296874#23296874
+
+    ¹ https://docs.python.org/3/library/argparse.html#parents
+    ² https://docs.python.org/3/library/argparse.html#conflict-handler
+    """
+    shared_parser = argparse.ArgumentParser(add_help=False)
+
+    shared_inputs = shared_parser.add_argument_group(
+        title="INPUTS",
+        description="""
+            Input options shared by all `augur curate` commands.
+            If no input options are provided, commands will try to read NDJSON records from stdin.
+        """)
+    shared_inputs.add_argument("--metadata",
+        help="Input metadata file. May be plain text (TSV, CSV) or an Excel or OpenOffice spreadsheet workbook file. When an Excel or OpenOffice workbook, only the first visible worksheet will be read and initial empty rows/columns will be ignored. Accepts '-' to read plain text from stdin.")
+    shared_inputs.add_argument("--id-column",
+        help="Name of the metadata column that contains the record identifier for reporting duplicate records. "
+             "Uses the first column of the metadata file if not provided. "
+             "Ignored if also providing a FASTA file input.")
+    shared_inputs.add_argument("--metadata-delimiters", default=DEFAULT_DELIMITERS, nargs="+", action=ExtendOverwriteDefault,
+        help="Delimiters to accept when reading a plain text metadata file. Only one delimiter will be inferred.")
+
+    shared_inputs.add_argument("--fasta",
+        help="Plain or gzipped FASTA file. Headers can only contain the sequence id used to match a metadata record. " +
+             "Note that an index file will be generated for the FASTA file as <filename>.fasta.fxi")
+    shared_inputs.add_argument("--seq-id-column",
+        help="Name of metadata column that contains the sequence id to match sequences in the FASTA file.")
+    shared_inputs.add_argument("--seq-field",
+        help="The name to use for the sequence field when joining sequences from a FASTA file.")
+
+    shared_inputs.add_argument("--unmatched-reporting",
+        type=DataErrorMethod.argtype,
+        choices=list(DataErrorMethod),
+        default=DataErrorMethod.ERROR_FIRST,
+        help="How unmatched records from combined metadata/FASTA input should be reported.")
+    shared_inputs.add_argument("--duplicate-reporting",
+        type=DataErrorMethod.argtype,
+        choices=list(DataErrorMethod),
+        default=DataErrorMethod.ERROR_FIRST,
+        help="How should duplicate records be reported.")
+
+    shared_outputs = shared_parser.add_argument_group(
+        title="OUTPUTS",
+        description="""
+            Output options shared by all `augur curate` commands.
+            If no output options are provided, commands will output NDJSON records to stdout.
+        """)
+    shared_outputs.add_argument("--output-metadata",
+        help="Output metadata TSV file. Accepts '-' to output TSV to stdout.")
+
+    shared_outputs.add_argument("--output-fasta",
+        help="Output FASTA file.")
+    shared_outputs.add_argument("--output-id-field",
+        help="The record field to use as the sequence identifier in the FASTA output.")
+    shared_outputs.add_argument("--output-seq-field",
+        help="The record field that contains the sequence for the FASTA output. "
+             "This field will be deleted from the metadata output.")
+
+    return shared_parser
+
+
+shared_parser = create_shared_parser()
diff --git a/augur/curate/format_dates.py b/augur/curate/format_dates.py
@@ -10,6 +10,7 @@
 from augur.io.print import print_err
 from augur.types import DataErrorMethod
 from .format_dates_directives import YEAR_DIRECTIVES, YEAR_MONTH_DIRECTIVES, YEAR_MONTH_DAY_DIRECTIVES
+from .argparse_shared_parser import shared_parser
 
 
 # Default date formats that this command should parse
@@ -24,7 +25,7 @@
 
 def register_parser(parent_subparsers):
     parser = parent_subparsers.add_parser("format-dates",
-        parents=[parent_subparsers.shared_parser],
+        parents=[shared_parser],
         help=__doc__)
 
     required = parser.add_argument_group(title="REQUIRED")

diff --git a/augur/curate/normalize_strings.py b/augur/curate/normalize_strings.py
@@ -7,11 +7,12 @@
 import unicodedata
 
 from augur.utils import first_line
+from .argparse_shared_parser import shared_parser
 
 
 def register_parser(parent_subparsers):
     parser = parent_subparsers.add_parser("normalize-strings",
-        parents=[parent_subparsers.shared_parser],
+        parents=[shared_parser],
         help=first_line(__doc__))
 
     optional = parser.add_argument_group(title="OPTIONAL")

diff --git a/augur/curate/parse_genbank_location.py b/augur/curate/parse_genbank_location.py
@@ -10,6 +10,7 @@
 from typing import Generator, List
 from augur.io.print import print_err
 from augur.utils import first_line
+from .argparse_shared_parser import shared_parser
 
 
 def parse_location(
@@ -50,7 +51,7 @@ def register_parser(
 ) -> argparse._SubParsersAction:
     parser = parent_subparsers.add_parser(
         "parse-genbank-location",
-        parents=[parent_subparsers.shared_parser],  # type: ignore
+        parents=[shared_parser],  # type: ignore
         help=first_line(__doc__),
     )
 

diff --git a/augur/curate/passthru.py b/augur/curate/passthru.py
@@ -2,11 +2,12 @@
 Pass through records without doing any data transformations.
 Useful for testing, troubleshooting, or just converting file formats.
 """
+from .argparse_shared_parser import shared_parser
 
 
 def register_parser(parent_subparsers):
     return parent_subparsers.add_parser("passthru",
-        parents=[parent_subparsers.shared_parser],
+        parents=[shared_parser],
         help=__doc__)