From 8b00fbdd9bedcf24517c70cf0931695858211927 Mon Sep 17 00:00:00 2001
From: Jover Lee <joverlee521@gmail.com>
Date: Thu, 1 Aug 2024 16:13:19 -0700
Subject: [PATCH 1/2] Revert "write_records_to_tsv: Stop quoting output TSV"

This reverts commit 915672e9db021588349ede4e23f21003455eb705.

Per discussion in <https://github.com/nextstrain/augur/pull/1563#discussion_r1699260164>,
keep CSV-like TSV where quotes may be added or removed, but parsed
values should be equivalent.
---
 augur/io/metadata.py      | 4 +---
 tests/io/test_metadata.py | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/augur/io/metadata.py b/augur/io/metadata.py
index f6e0cf0b2..4cef0a65b 100644
--- a/augur/io/metadata.py
+++ b/augur/io/metadata.py
@@ -548,9 +548,7 @@ def write_records_to_tsv(records, output_file):
             output_columns,
             extrasaction='ignore',
             delimiter='\t',
-            lineterminator='\n',
-            quoting=csv.QUOTE_NONE,
-            quotechar=None,
+            lineterminator='\n'
         )
         tsv_writer.writeheader()
         tsv_writer.writerow(first_record)
diff --git a/tests/io/test_metadata.py b/tests/io/test_metadata.py
index 6ddd27e92..548db3328 100644
--- a/tests/io/test_metadata.py
+++ b/tests/io/test_metadata.py
@@ -458,7 +458,7 @@ def output_records():
 def expected_output_tsv():
     return (
         "strain\tcountry\tdate\n"
-        'SEQ_A\t"USA"\t2020-10-01\n'
+        'SEQ_A\t"""USA"""\t2020-10-01\n'
         "SEQ_T\tUSA\t2020-10-02\n"
     )
 

From ad029f60880a4b794c5ea875808c1e946f5ff4a1 Mon Sep 17 00:00:00 2001
From: Jover Lee <joverlee521@gmail.com>
Date: Thu, 1 Aug 2024 16:38:21 -0700
Subject: [PATCH 2/2] curate: fix endless additional quotes

Resolves <https://github.com/nextstrain/augur/issues/1312>

We are expecting the CSV-like double quoting when there are internal
quotes. If the field value is already correctly double quoted, then
there should not be any additional quotes.
---
 augur/io/metadata.py                                        | 3 ++-
 .../curate/cram/metadata-output-with-internal-quotes.t      | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/augur/io/metadata.py b/augur/io/metadata.py
index 4cef0a65b..80cb09cbc 100644
--- a/augur/io/metadata.py
+++ b/augur/io/metadata.py
@@ -273,7 +273,8 @@ def visible_worksheet(s: calamine.SheetMetadata) -> bool:
                 # change in a future Python version.
                 raise InvalidDelimiter from error
 
-            metadata_reader = csv.DictReader(handle, dialect=dialect)
+            # Only use the dialect delimiter and keep all other default format params
+            metadata_reader = csv.DictReader(handle, delimiter=dialect.delimiter)
 
             columns, records = metadata_reader.fieldnames, iter(metadata_reader)
 
diff --git a/tests/functional/curate/cram/metadata-output-with-internal-quotes.t b/tests/functional/curate/cram/metadata-output-with-internal-quotes.t
index 0c8541798..1e3725f7e 100644
--- a/tests/functional/curate/cram/metadata-output-with-internal-quotes.t
+++ b/tests/functional/curate/cram/metadata-output-with-internal-quotes.t
@@ -12,7 +12,7 @@ Create NDJSON with internal quotes
   > ~~
 
 Test passthru with output to TSV.
-This should not add any quotes around the field with internal quotes.
+This should add double quotes around the internal quotes to match CSV-like quoting.
 
   $ cat records.ndjson \
   >   | ${AUGUR} curate passthru \
@@ -20,10 +20,10 @@ This should not add any quotes around the field with internal quotes.
 
   $ cat output-metadata.tsv
   strain\tsubmitting_lab (esc)
-  sequence_A\tSRC VB "Vector", Molecular Biology of Genomes (esc)
+  sequence_A\t"SRC VB ""Vector"", Molecular Biology of Genomes" (esc)
 
 Run the output TSV through augur curate passthru again.
-The new output should still be identical to the first output.
+The new output should still be identical to the first output because it is already double quoted.
 
   $ ${AUGUR} curate passthru \
   > --metadata output-metadata.tsv \