From 7e1b4b5df307ed0196c8980e81cfc0bce8b7cb7b Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Wed, 4 Dec 2024 14:29:20 -0800 Subject: [PATCH] data-formats: Add example Python snippets for TSV handling Prompted by @jameshadfield's request Snippets are simplified versions of TSV handling in Augur. --- src/reference/data-formats.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/reference/data-formats.rst b/src/reference/data-formats.rst index 2db1329..0fa62bc 100644 --- a/src/reference/data-formats.rst +++ b/src/reference/data-formats.rst @@ -30,6 +30,34 @@ When using `tsv-utils `__ | tsv-uniq -H -f strain \ | csvtk fix-quotes --tabs > output.tsv +If you are using custom Python scripts to handle TSV files, we recommend using the +`csv module `__ to read and write the files. + +.. note:: + + Be sure to follow `csv module's recommendation `__ + to open files with the ``newline=''``. + +Reading a TSV file: + +.. code-block:: Python + + with open(input_file, 'r', newline='') as handle: + reader = csv.reader(handle, delimiter='\t') + for row in reader: + ... + +Writing a TSV file: + +.. code-block:: Python + + with open_file(output_file, 'w', newline='') as output_handle: + tsv_writer = csv.writer(output_handle, delimiter='\t') + tsv_writer.writerow(header) + for record in records: + tsv_writer.writerow(record) + + See our internal `discussion on TSV standardization `__ for more details. JSON