-
Notifications
You must be signed in to change notification settings - Fork 1
/
docker-compose.yml
182 lines (166 loc) · 5.33 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
x-airflow-common:
&airflow-common
environment:
&airflow-common-environment
AIRFLOW__CORE__DEFAULT_TIMEZONE: Europe/Paris
AIRFLOW__CORE__EXECUTOR: LocalExecutor
# AIRFLOW__CORE__FERNET_KEY:
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS: 'false'
AIRFLOW__CORE__MAX_MAP_LENGTH: 2048
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql://airflow:airflow@airflow-db:5432/airflow
AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL: 60
AIRFLOW__SCHEDULER__PARSING_PROCESSES: 4
AIRFLOW__CORE__PARALLELISM: 4
AIRFLOW__SCHEDULER__MAX_TIS_PER_QUERY: 0
AIRFLOW__CORE__DEFAULT_POOL_TASK_SLOT_COUNT: 24
AIRFLOW__WEBSERVER__SECRET_KEY: USE_IN_DEVELOPMENT_ONLY
AIRFLOW__WEBSERVER__WORKERS: 1
# Connections
AIRFLOW_CONN_PG: postgresql://data-inclusion:data-inclusion@target-db:5432/data-inclusion
AIRFLOW_CONN_S3: aws://@/data-inclusion-lake?endpoint_url=http%3A%2F%2Fminio%3A9000&aws_access_key_id=minioadmin&aws_secret_access_key=minioadmin
AIRFLOW_CONN_S3_SOURCES: ${AIRFLOW_CONN_S3_SOURCES}
AIRFLOW_CONN_SSH_API: ${AIRFLOW_CONN_SSH_API}
AIRFLOW_CONN_PG_API: ${AIRFLOW_CONN_PG_API}
# Variables
AIRFLOW_VAR_BREVO_API_KEY: ${AIRFLOW_VAR_BREVO_API_KEY}
AIRFLOW_VAR_DATAGOUV_API_KEY: ${AIRFLOW_VAR_DATAGOUV_API_KEY}
AIRFLOW_VAR_DORA_API_TOKEN: ${AIRFLOW_VAR_DORA_API_TOKEN}
AIRFLOW_VAR_FREDO_API_TOKEN: ${AIRFLOW_VAR_FREDO_API_TOKEN}
AIRFLOW_VAR_FT_API_TOKEN: ${AIRFLOW_VAR_FT_API_TOKEN}
AIRFLOW_VAR_DORA_PREPROD_API_TOKEN: ${AIRFLOW_VAR_DORA_PREPROD_API_TOKEN}
AIRFLOW_VAR_EMPLOIS_API_TOKEN: ${AIRFLOW_VAR_EMPLOIS_API_TOKEN}
AIRFLOW_VAR_MES_AIDES_AIRTABLE_KEY: ${AIRFLOW_VAR_MES_AIDES_AIRTABLE_KEY}
AIRFLOW_VAR_SOLIGUIDE_API_TOKEN: ${AIRFLOW_VAR_SOLIGUIDE_API_TOKEN}
AIRFLOW_VAR_TWOCAPTCHA_API_KEY: ${AIRFLOW_VAR_TWOCAPTCHA_API_KEY}
volumes:
- ./pipeline/dbt:/opt/airflow/dbt
- ./pipeline/dags:/opt/airflow/dags
- ./pipeline/src:/opt/airflow/src
user: ${AIRFLOW_UID:-50000}:0
depends_on:
&airflow-common-depends-on
airflow-db:
condition: service_healthy
services:
airflow-db:
image: postgres:14
restart: no
healthcheck:
test: [ "CMD", "pg_isready", "-U", "airflow" ]
interval: 5s
retries: 5
ports:
- ${AIRFLOW_DB_PORT:-5454}:5432
environment:
- POSTGRES_DB=airflow
- POSTGRES_USER=airflow
- POSTGRES_PASSWORD=airflow
airflow-webserver:
<<: *airflow-common
image: data-inclusion/pipeline
command: webserver
restart: no
ports:
- ${AIRFLOW_UI_PORT:-8080}:8080
depends_on:
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
airflow-scheduler:
<<: *airflow-common
image: data-inclusion/pipeline
build:
context: pipeline
command: scheduler
restart: no
healthcheck:
test:
[
"CMD-SHELL",
'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"'
]
interval: 10s
timeout: 10s
retries: 5
depends_on:
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
env_file:
- ./pipeline/defaults.env
airflow-init:
<<: *airflow-common
image: data-inclusion/pipeline
command: airflow version
environment:
<<: *airflow-common-environment
# Additional variables for development only
_AIRFLOW_DB_MIGRATE: 'true'
_AIRFLOW_WWW_USER_CREATE: 'true'
_AIRFLOW_WWW_USER_USERNAME: airflow
_AIRFLOW_WWW_USER_PASSWORD: airflow
minio:
image: minio/minio:RELEASE.2024-05-10T01-41-38Z
command: server /data
restart: no
ports:
- 9000:9000
- 9001:9001
environment:
- MINIO_CONSOLE_ADDRESS=:9001
- MINIO_ROOT_USER=minioadmin
- MINIO_ROOT_PASSWORD=minioadmin
volumes:
- minio-data:/data
minio-init:
image: minio/mc:RELEASE.2024-05-09T17-04-24Z
entrypoint: /bin/bash -c
command:
- |
mc alias set tmp http://minio:9000 minioadmin minioadmin
mc admin update --yes tmp
mc mb --ignore-existing tmp/data-inclusion-lake
depends_on:
- minio
target-db:
build: datawarehouse
restart: no
command: -c fsync=off -c full_page_writes=off -c synchronous_commit=off -c log_statement=all
healthcheck:
test: [ "CMD", "pg_isready", "-U", "data-inclusion" ]
interval: 5s
retries: 5
ports:
- ${TARGET_POSTGRES_PORT:-5433}:5432
environment:
- POSTGRES_DB=data-inclusion
- POSTGRES_USER=data-inclusion
- POSTGRES_PASSWORD=data-inclusion
volumes:
- pg-data:/var/lib/postgresql/data
api:
image: data-inclusion/api
build: api
depends_on:
target-db:
condition: service_healthy
restart: no
ports:
- ${API_PORT:-8000}:8000
environment:
- ENV=${API_ENV:-dev}
- DEBUG=${API_DEBUG:-False}
- DATABASE_URL=postgresql://data-inclusion:data-inclusion@target-db:5432/data-inclusion
- BASE_URL=http://127.0.0.1:8000
- SECRET_KEY=USE_IN_DEVELOPMENT_ONLY
- DATALAKE_ENDPOINT_URL=http://minio:9000
- DATALAKE_BUCKET_NAME=data-inclusion-lake
- DATALAKE_SECRET_KEY=minioadmin
- DATALAKE_ACCESS_KEY=minioadmin
volumes:
pg-data:
minio-data:
networks:
default:
name: data-inclusion