-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
163 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
sample: | ||
- podar | ||
|
||
outdir: outputs.private/ | ||
|
||
private_databases: | ||
- databases/podar-ref.zip | ||
|
||
private_databases_info: | ||
- databases/podar-ref.info.csv | ||
|
||
taxonomies: | ||
- databases/podar-ref.tax.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#! /usr/bin/env python | ||
""" | ||
Copy private genomes into a new directory, properly named; create a summary | ||
CSV for genome-grist. | ||
""" | ||
import sys | ||
import argparse | ||
import screed | ||
import csv | ||
import os | ||
import shutil | ||
|
||
|
||
def main(): | ||
p = argparse.ArgumentParser() | ||
p.add_argument('genome_files', nargs='+') | ||
p.add_argument('-o', '--output-csv', required=True) | ||
p.add_argument('-d', '--output-directory', required=True) | ||
args = p.parse_args() | ||
|
||
output_fp = open(args.output_csv, 'wt') | ||
w = csv.DictWriter(output_fp, fieldnames=['acc', | ||
'ncbi_tax_name', | ||
'genome_filename']) | ||
w.writeheader() | ||
|
||
try: | ||
os.mkdir(args.output_directory) | ||
print(f"Created genome directory '{args.output_directory}'") | ||
except FileExistsError: | ||
print(f"Genome directory '{args.output_directory}' already exists.") | ||
|
||
print(f"Copying genomes into '{args.output_directory}'") | ||
|
||
n = 0 | ||
for filename in args.genome_files: | ||
print(f"---") | ||
print(f"processing genome '{filename}'") | ||
|
||
for record in screed.open(filename): | ||
record_name = record.name | ||
break | ||
|
||
record_name = record_name.split(' ', 1) | ||
acc, remainder = record_name | ||
|
||
print(f"read identifer '{acc}' and name '{remainder}'") | ||
|
||
destfile = os.path.join(args.output_directory, f"{acc}_genomic.fna.gz") | ||
print(f"copying '{filename}' to '{destfile}'") | ||
shutil.copyfile(filename, destfile) | ||
|
||
w.writerow(dict(acc=acc, ncbi_tax_name=remainder, | ||
genome_filename=destfile)) | ||
n += 1 | ||
|
||
output_fp.close() | ||
print('---') | ||
print(f"wrote {n} genome entries to '{args.output_csv}'") | ||
|
||
return 0 | ||
|
||
if __name__ == '__main__': | ||
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#! /usr/bin/env python | ||
""" | ||
Scan a list of genome files and create individual "info file" CSVs | ||
for genome-grist to use for private genomes. | ||
""" | ||
import sys | ||
import argparse | ||
import screed | ||
import csv | ||
import os | ||
import shutil | ||
|
||
|
||
def main(): | ||
p = argparse.ArgumentParser() | ||
p.add_argument('info_csv') | ||
args = p.parse_args() | ||
|
||
info_d = {} | ||
with open(args.info_csv, 'r', newline="") as fp: | ||
r = csv.DictReader(fp) | ||
for row in r: | ||
acc = row['acc'] | ||
info_d[acc] = row | ||
|
||
print(f"loaded {len(info_d)} info files from '{args.info_csv}'") | ||
|
||
n = 0 | ||
for acc, item_d in info_d.items(): | ||
# write .info.csv. | ||
dirname = os.path.dirname(item_d['genome_filename']) | ||
info_filename = os.path.join(dirname, f"{acc}.info.csv") | ||
name = item_d['ncbi_tax_name'] | ||
|
||
with open(info_filename, 'wt') as fp: | ||
w2 = csv.DictWriter(fp, fieldnames=['acc', | ||
'ncbi_tax_name']) | ||
w2.writeheader() | ||
w2.writerow(dict(acc=acc, ncbi_tax_name=name)) | ||
print(f"Created info CSV '{info_filename}'") | ||
|
||
n += 1 | ||
|
||
print(f"wrote {n} info files.") | ||
|
||
return 0 | ||
|
||
if __name__ == '__main__': | ||
sys.exit(main()) |