forked from mpi2/impc-etl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
luigi.cfg.template_example
126 lines (114 loc) · 4.86 KB
/
luigi.cfg.template_example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
[DEFAULT]
output_path=impc/%DR_TAG%/parquet/
openstats_jdbc_connection=jdbc:postgresql://localhost:5432/test
openstats_db_user=impc
openstats_db_password=impc
data_release_version=%DR_TAG%
dcc_xml_path=impc/%DR_TAG%/xml/
ontology_input_path=impc/%DR_TAG%/ontologies/
imits_colonies_tsv_path=impc/%DR_TAG%/imits/imits-report.tsv
imits_alleles_tsv_path=impc/%DR_TAG%/imits/allele2Entries.tsv
imits_product_tsv_path=impc/%DR_TAG%/imits/productEntries.tsv
mgi_strain_input_path=impc/%DR_TAG%/mgi/MGI_Strain.rpt
mgi_allele_input_path=impc/%DR_TAG%/mgi/MGI_PhenotypicAllele.rpt
mgi_gene_pheno_input_path=impc/%DR_TAG%/mgi/MGI_GenePheno.rpt
mgi_homologene_input_path=impc/%DR_TAG%/mgi/HGNC_homologene.rpt
mgi_mrk_list_input_path=impc/%DR_TAG%/mgi/MRK_List1.rpt
emap_emapa_csv_path=impc/%DR_TAG%/ontologies/EMAP-EMAPA.txt
emapa_metadata_csv_path=impc/%DR_TAG%/ontologies/emapa_metadata_table.csv
ma_metadata_csv_path=impc/%DR_TAG%/ontologies/ma_metadata_table.csv
mpath_metadata_csv_path=impc/%DR_TAG%/ontologies/mpath_metadata_table.csv
impc_search_index_csv_path=impc/%DR_TAG%/ontologies/impc_search_index.csv
mp_relation_augmented_metadata_table_csv_path=impc/%DR_TAG%/ontologies/mp-relation-augmented_metadata_table.csv
threei_stats_results_csv=impc/%DR_TAG%/misc/flow_results_EBIexport_180119.csv
embryo_data_json_path=impc/%DR_TAG%/misc/embryo_data.json
omero_ids_csv_path=impc/%DR_TAG%/misc/impc_images_input_with_omero_ids.csv
http_proxy=localhost:3333
use_cache=true
raw_data_in_output=exclude
parquet_path=impc/%DR_TAG%/parquet/
solr_path=impc/%DR_TAG%/solr/
remote_host=hdfs-host
local_path=/nfs/nobackup/spot/mouseinformatics/impc-etl/%DR_TAG%/solr/
gentar_colonies_tsv_path=impc/%DR_TAG%/imits/gentar-phenotyping-colonies-report.tsv
gentar_gene_status_path=impc/%DR_TAG%/imits/gene_interest.tsv
imits_gene_status_path=impc/%DR_TAG%/imits/mp2_load_gene_interest_report_es_cell.tsv
api_db_jdbc_connection_str=jdbc:postgresql://localhost:5432/impc_api
api_db_user=impc
api_db_password=impc
extract_windowed_data=false
impress_api_url=http://sandbox.mousephenotype.org/impressapi/
impress_root_type=pipeline
mongodb_connection_uri=mongodb://username:password@mongo-host:2701
mongodb_database=impc
mongodb_genes_collection=genes
mongodb_stats_collection=statistical_results
mongodb_replica_set=mongo-replica
[core]
default-scheduler-url=http://luigi-host:8082/
[spark]
packages=com.databricks:spark-xml_2.12:0.12.0,mysql:mysql-connector-java:8.0.20,org.postgresql:postgresql:42.2.12,org.mongodb.spark:mongo-spark-connector_2.12:3.0.1
driver_memory=32G
driver_cores=5
master=yarn
spark_submit=/homes/mi_hadoop/spark-3.0.2-bin-hadoop2.7/bin/spark-submit
jars=lib/phenodcc-derived-parameters-2020.06.04.jar
driver_class_path=lib/phenodcc-derived-parameters-2020.06.04.jar
conf=spark.sql.legacy.parquet.datetimeRebaseModeInWrite=LEGACY | spark.sql.legacy.timeParserPolicy=LEGACY | spark.yarn.maxAppAttempts=1 | yarn.nodemanager.vmem-check-enabled=false | spark.executor.memoryOverhead=5g | spark.yarn.driver.memoryOverhead=5g | spark.blacklist.enabled=true | spark.executorEnv.PYSPARK_PYTHON=/usr/bin/python36 | spark.yarn.appMasterEnv.PYSPARK_PYTHON=/usr/bin/python36 | spark.yarn.appMasterEnv.PYSPARK_DRIVER_PYTHON=/usr/bin/python36 | spark.yarn.appMasterEnv.HTTP_PROXY=localhost:3333 | spark.sql.session.timeZone=UTC | spark.local.dir=/scratch | spark.files.overwrite=true | spark.jars.packages=com.databricks:spark-xml_2.12:0.12.0,mysql:mysql-connector-java:8.0.20,org.postgresql:postgresql:42.2.12,org.mongodb.spark:mongo-spark-connector_2.12:3.0.1
deploy_mode=cluster
num-executors=49
executor_memory=64G
executor-cores=5
py_files=dist/impc_etl.zip,dist/libs.zip
[hdfs]
client=webhdfs
namenode_host=hdfs-host-node-host
[ImpcConfig]
deploy_mode=cluster
[ImpcIndexDaily]
[ImpcCleanDaily]
[ImpcEtl]
[ImpcSolrCores]
[ImpcStatPacketLoader]
[ImpcWindowedDataLoader]
[ImpressExtractor]
[GeneCoreLoader]
[StatsResultsCoreLoader]
[GeneProductionStatusExtractor]
[MouseNormalizer]
[ExperimentNormalizer]
[LineExperimentNormalizer]
[ExperimentBWAgeProcessor]
[ExperimentParameterDerivator]
[LineParameterDerivator]
[ImpcIndexDataRelease]
[ColonyTrackingExtractor]
[PipelineCoreLoader]
[MPChooserLoader]
[ObservationsMapper]
[ApiSpecimenMapper]
[ApiExperimentMapper]
[ApiObservationMapper]
[GenotypePhenotypeCoreLoader]
[MPCoreLoader]
[MGIPhenotypeCoreLoader]
[OntologyExtractor]
[OntologyMetadataExtractor]
[ImpcImagesCoreLoader]
[BatchQueryLoader]
[ApiPostgreSQLLoader]
[ImpcStatsBundleMapper]
[ImpcGeneBundleMapper]
[OpenStatsExtractor]
[MGIHomoloGeneExtractor]
mgi_input_path=impc/%DR_TAG%/mgi/HGNC_homologene.rpt
[MGIMrkListExtractor]
mgi_input_path=impc/%DR_TAG%/mgi/MRK_List1.rpt
[GeneExtractor]
imits_tsv_path=impc/%DR_TAG%/imits/allele2Entries.tsv
[AlleleExtractor]
imits_tsv_path=impc/%DR_TAG%/imits/allele2Entries.tsv
[Allele2Extractor]
imits_tsv_path=impc/%DR_TAG%/imits/allele2Entries.tsv
[ProductExtractor]
imits_tsv_path=impc/%DR_TAG%/imits/productEntries.tsv