-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: CDDG-719 etl variant and variant publish
- Loading branch information
1 parent
40a93c7
commit cebf189
Showing
6 changed files
with
158 additions
and
78 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from datetime import datetime | ||
|
||
from airflow import DAG | ||
from airflow.models.param import Param | ||
from airflow.utils.task_group import TaskGroup | ||
|
||
from es_templates_update import es_templates_update | ||
from etl_enrich_specimens import etl_enrich_specimens | ||
from etl_enrich_variants import variant_task_enrich_variants, variant_task_enrich_consequences | ||
from etl_index import index_operator | ||
from etl_index_variants import index_variants | ||
from etl_normalize_variants import normalize_variant_operator | ||
from etl_prepare_index_variants import etl_variant_prepared | ||
from etl_publish import publish_task | ||
|
||
with DAG( | ||
dag_id='etl-variant', | ||
start_date=datetime(2022, 1, 1), | ||
schedule_interval=None, | ||
params={ | ||
'study_code': Param('CAG', type='string'), #FIXME study Codes vs study code !!! | ||
'owner': Param('jmichaud', type='string'), | ||
'dataset': Param('dataset_default', type='string'), | ||
'batch': Param('annotated_vcf', type='string'), | ||
'release_id': Param('7', type='string'), | ||
'study_codes': Param('CAG', type='string'), | ||
'project': Param('cqdg', type='string'), | ||
'es_port': Param('9200', type='string') | ||
}, | ||
) as dag: | ||
|
||
with TaskGroup(group_id='normalize') as normalize: | ||
normalize_variant_operator('snv') >> normalize_variant_operator('consequences') | ||
|
||
with TaskGroup(group_id='enrich') as enrich: | ||
variant_task_enrich_variants() >> variant_task_enrich_consequences() | ||
|
||
with TaskGroup(group_id='prepared') as prepared: | ||
etl_variant_prepared('variant_centric') >> etl_variant_prepared('gene_centric') >> etl_variant_prepared('variant_suggestions') >> etl_variant_prepared('gene_suggestions') | ||
|
||
with TaskGroup(group_id='index') as index: | ||
index_variants() | ||
|
||
etl_enrich_specimens() >> normalize >> enrich >> prepared >> es_templates_update() >> index >> publish_task('study_centric,participant_centric,file_centric,biospecimen_centric') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from airflow import DAG | ||
from airflow.models import Param, Variable | ||
from datetime import datetime | ||
from lib.config import kube_config, es_url, es_port, es_credentials_secret_name, \ | ||
es_credentials_secret_key_password, es_credentials_secret_key_username, release_id, study_codes | ||
from lib.operators.publish import PublishConfig | ||
|
||
etl_publish_config = PublishConfig( | ||
es_url = es_url, | ||
kube_config = kube_config, | ||
image = Variable.get('publish_image'), | ||
es_port = es_port, | ||
es_cert_secret_name = 'opensearch-ca-certificate', | ||
es_credentials_secret_name = es_credentials_secret_name, | ||
es_credentials_secret_key_username = es_credentials_secret_key_username, | ||
es_credentials_secret_key_password = es_credentials_secret_key_password, | ||
) | ||
|
||
def publish_task(job_types: str): | ||
return etl_publish_config.args( | ||
'-n', 'https://search-workers.qa.juno.cqdg.ferlab.bio', | ||
'-p', es_port, | ||
'-r', release_id, | ||
'-j', job_types) \ | ||
.operator( | ||
task_id='etl_publish_variant', | ||
name='etl-publish_variant', | ||
) | ||
with DAG( | ||
dag_id='etl-publish-variant', | ||
start_date=datetime(2022, 1, 1), | ||
schedule_interval=None, | ||
params={ | ||
'es_port': Param('9200', type='string'), | ||
'release_id': Param('0', type='string'), | ||
'job_types': Param('variant_centric,variant_suggestions,gene_centric,gene_suggestions', type='string'), | ||
}, | ||
) as dag: | ||
publish_task('{{ params.job_types }}') | ||
|