Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pass through sbt template if optional defaults_yaml is not provided and author_list string is empty #492

Merged
merged 1 commit into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 56 additions & 24 deletions pipes/WDL/tasks/tasks_ncbi.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ task generate_author_sbt_file {
input {
String? author_list
File j2_template
File defaults_yaml
File? defaults_yaml
String? out_base = "authors"

String docker = "quay.io/broadinstitute/py3-bio:0.1.2"
Expand All @@ -618,7 +618,7 @@ task generate_author_sbt_file {
description: "A string containing a space-delimited list with of author surnames separated by first name and (optional) middle initial. Ex. 'Lastname,Firstname, Last-hypenated,First,M., Last,F.'"
}
j2_template: {
description: "A jinja2-format template for the sbt file expected by NCBI. Example: gs://pathogen-public-dbs/other-related/author_template.sbt.j2"
description: "an sbt file (optionally) with Jinja2 variables to be filled in based on values present in author_sbt_defaults_yaml, if provided. If no yaml is provided, this file is passed through verbatim. Example: gs://pathogen-public-dbs/other-related/author_template.sbt.j2"
}
defaults_yaml: {
description: "A YAML file with default values to use for the submitter, submitter affiliation, and author affiliation. Optionally including authors at the start and end of the author_list. Example: gs://pathogen-public-dbs/other-related/default_sbt_values.yaml",
Expand All @@ -632,10 +632,14 @@ task generate_author_sbt_file {
command <<<
set -e

# blank yaml file to be used if the optional input is not specified
touch blank.yml

python3 << CODE
# generates an sbt file of the format returned by:
# http://www.ncbi.nlm.nih.gov/WebSub/template.cgi
import re
import shutil
# external dependencies
import yaml # pyyaml
from jinja2 import Template #jinja2
Expand All @@ -644,19 +648,34 @@ task generate_author_sbt_file {
# simple version for only initials: #author_re=re.compile(r"\s?(?P<lastname>[\w\'\-\ ]+),(?P<initials>(?:[A-Z]\.){1,3})")
author_re=re.compile(r"\s?(?P<lastname>[\w\'\-\ ]+),((?P<first>\w[\w\'\-\ ]+\.?),?|(?P<initials>(?:[A-Z]\.)+))(?P<initials_ext>(?:[A-Z]\.)*)")

authors=[]
defaults_data_last_authors=[]
defaults_data = {}

authors_affil = None
submitter = None
bioproject = None
title = None
citation = None

if defaults_yaml is not None:
with open(defaults_yaml) as defaults_yaml:
defaults_data = yaml.load(defaults_yaml, Loader=yaml.FullLoader)

authors=[]
submitter = defaults_data.get("submitter")
bioproject = defaults_data.get("bioproject")
title = defaults_data.get("title")
citation = defaults_data.get("citation")
authors_affil = defaults_data.get("authors_affil")

authors.extend(defaults_data.get("authors_start",[]))
if defaults_data is not None:
submitter = defaults_data.get("submitter")
bioproject = defaults_data.get("bioproject")
title = defaults_data.get("title")
citation = defaults_data.get("citation")
authors_affil = defaults_data.get("authors_affil")

defaults_data_authors = defaults_data.get("authors_start",[])
for author in defaults_data_authors:
authors.extend(author)

defaults_data_last_authors = defaults_data.get("authors_last",[])
for author in defaults_data_last_authors:
last_authors.append(author)

for author_match in author_re.finditer(author_string):
author = {}
Expand All @@ -680,24 +699,37 @@ task generate_author_sbt_file {
if author not in authors: # could use less exact match
authors.append(author)

for author in defaults_data.get("authors_last",[]):
for author in defaults_data_last_authors:
if author not in authors:
authors.append(author)

with open(j2_template) as sbt_template:
template = Template(sbt_template.read())
rendered = template.render( authors=authors,
authors_affil=authors_affil,
title=title,
submitter=submitter,
citation=citation,
bioproject=bioproject)
jinja_rendering_kwargs={}
if authors_affil is not None:
jinja_rendering_kwargs["authors_affil"]=authors_affil
if title is not None:
jinja_rendering_kwargs["title"]=title
if submitter is not None:
jinja_rendering_kwargs["submitter"]=submitter
if citation is not None:
jinja_rendering_kwargs["citation"]=citation
if bioproject is not None:
jinja_rendering_kwargs["bioproject"]=bioproject

if len(authors) >= 1 or len(jinja_rendering_kwargs) >= 1:
with open(j2_template) as sbt_template:
template = Template(sbt_template.read())

rendered = template.render( authors=authors,
**jinja_rendering_kwargs)

#print(rendered)
with open(sbt_out_path,"w") as sbt_out:
sbt_out.write(rendered)

render_sbt("~{author_list}", defaults_yaml="~{defaults_yaml}", sbt_out_path="~{out_base}.sbt", j2_template="~{j2_template}")
#print(rendered)
with open(sbt_out_path,"w") as sbt_out:
sbt_out.write(rendered)
else:
# if no authors were specified, simply copy the template to the output
shutil.copyfile(j2_template, sbt_out_path)

render_sbt("~{author_list}", defaults_yaml="~{default='blank.yml' defaults_yaml}", sbt_out_path="~{out_base}.sbt", j2_template="~{j2_template}")
CODE
>>>
output {
Expand Down
6 changes: 3 additions & 3 deletions pipes/WDL/workflows/genbank.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ workflow genbank {

String email_address # required for fetching data from NCBI APIs
String? author_list # of the form "Lastname,A.B., Lastname,C.,"; optional alternative to names in author_sbt_defaults_yaml
File author_sbt_defaults_yaml # defaults to fill in for author_sbt file (including both author and non-author fields)
File author_sbt_j2_template
File? author_sbt_defaults_yaml # defaults to fill in for author_sbt file (including both author and non-author fields)
File author_sbt_j2_template # an sbt file (optionally) with Jinja2 variables filled in based on author_sbt_defaults_yaml if provided
File biosample_attributes
String sequencingTech
String? comment
Expand All @@ -48,7 +48,7 @@ workflow genbank {
patterns: ["*.yaml","*.yml"]
}
author_sbt_j2_template: {
description: "A jinja2-format template for the sbt file expected by NCBI. Example: gs://pathogen-public-dbs/other-related/author_template.sbt.j2"
description: "an sbt file (optionally) with Jinja2 variables to be filled in based on values present in author_sbt_defaults_yaml, if provided. If author_list is blank and author_sbt_defaults_yaml is not provided (or is blank), this file is passed through verbatim. Example: gs://pathogen-public-dbs/other-related/author_template.sbt.j2"
}
biosample_attributes: {
description: "A post-submission attributes file from NCBI BioSample, which is available at https://submit.ncbi.nlm.nih.gov/subs/ and clicking on 'Download attributes file with BioSample accessions'.",
Expand Down
Loading