/
docker-compose.yml
340 lines (323 loc) · 11.1 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
version: '3.8'
services:
# dev workspace
jupyter:
# platform: linux/x86_64 # for others (mostly) or can just remove this line
platform: linux/arm64 # for Mac M1
container_name: jupyter
build:
context: ./services/jupyter
dockerfile: Dockerfile
args:
NB_USER: ${JUPYTER_USER}
NB_PWD: 123456789
NB_UID: 1412
CONDA_DIR: /opt/anaconda3
ARCH: aarch64 # aarch64 for Mac M1 | x86_64 for others (mostly)
JUPYTER_PORT: ${JUPYTER_PORT}
MLFLOW_ARTIFACT_ROOT: ${MLFLOW_ARTIFACT_ROOT}
CENTRAL_STORAGE_PATH: /home/${JUPYTER_USER}/central_storage
MAIN_CONDA_ENV_NAME: computer-viz-dl
env_file:
- .env
environment:
- MLFLOW_TRACKING_URI=http://mlflow:${MLFLOW_PORT}
- PREFECT_API_URL=http://prefect:${PREFECT_PORT}/api
- CENTRAL_STORAGE_PATH=/home/${JUPYTER_USER}/central_storage
- DB_CONNECTION_URL=postgresql://dlservice_user:SuperSecurePwdHere@postgres:${POSTGRES_PORT}/dlservice_pg_db
- DB_PREDICTION_TABLE_NAME=predictions
- DB_API_LOG_TABLE_NAME=api_log
networks:
- mlops_network
ports:
- "${JUPYTER_PORT}:${JUPYTER_PORT}"
volumes:
- ./:/home/${JUPYTER_USER}/workspace/
- mlflow_data:${MLFLOW_ARTIFACT_ROOT}
- central_storage:/home/${JUPYTER_USER}/central_storage
- evidently_data:/home/${JUPYTER_USER}/workspace/deployments/evidently_workspaces
depends_on:
- mlflow
- prefect
# # Unconmment this field down below in case you have CUDA-enabled GPUs
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# device_ids: ['all']
# capabilities: [gpu]
# ML platform / experiment tracking
mlflow:
platform: linux/arm64
container_name: mlflow
restart: always
build:
context: ./services/mlflow
dockerfile: Dockerfile
args:
MLFLOW_PORT: ${MLFLOW_PORT}
env_file:
- .env
environment:
- BACKEND_STORE_URI=postgresql://mlflow_user:SuperSecurePwdHere@postgres:${POSTGRES_PORT}/mlflow_pg_db
networks:
- mlops_network
ports:
- "${MLFLOW_PORT}:${MLFLOW_PORT}"
volumes:
- mlflow_data:${MLFLOW_ARTIFACT_ROOT}
depends_on:
postgres:
condition: service_healthy
# orchestrator
prefect:
platform: linux/arm64
container_name: prefect
restart: always
build:
context: ./services/prefect
dockerfile: Dockerfile
args:
PREFECT_PORT: ${PREFECT_PORT}
env_file:
- .env
environment:
- PREFECT_API_URL=http://127.0.0.1:${PREFECT_PORT}/api
- PREFECT_API_DATABASE_CONNECTION_URL=postgresql+asyncpg://prefect_user:SuperSecurePwdHere@postgres:${POSTGRES_PORT}/prefect_pg_db
networks:
- mlops_network
ports:
- "${PREFECT_PORT}:${PREFECT_PORT}"
volumes:
- prefect_data:${PREFECT_LOCAL_STORAGE_PATH}
depends_on:
postgres:
condition: service_healthy
healthcheck:
# healthcheck can't access env variable at compose level
# so, $$ to tell compose not to parse this variable and it
# will be substituted with an actual env variable at runtime
test: ["CMD-SHELL", "curl $${PREFECT_API_URL}/health"]
interval: 5s
timeout: 5s
retries: 5
# worker / agent / automation
prefect_worker:
platform: linux/arm64
container_name: prefect_worker
build:
context: ./services/prefect_worker
dockerfile: Dockerfile
env_file:
- .env
environment:
- PREFECT_API_URL=http://prefect:${PREFECT_PORT}/api
- EVIDENTLY_URL=http://evidently:${EVIDENTLY_PORT}
- PREFECT_API_DATABASE_CONNECTION_URL=postgresql+asyncpg://prefect_user:SuperSecurePwdHere@postgres:${POSTGRES_PORT}/prefect_pg_db
- DB_CONNECTION_URL=postgresql://dlservice_user:SuperSecurePwdHere@postgres:${POSTGRES_PORT}/dlservice_pg_db
- CENTRAL_STORAGE_PATH=/service/central_storage
networks:
- mlops_network
volumes:
- central_storage:/service/central_storage
depends_on:
prefect:
condition: service_healthy
# model deployment / model service
dl_service:
platform: linux/arm64
container_name: dl_service
restart: always
build:
context: ./services/dl_service
dockerfile: Dockerfile
args:
DL_SERVICE_PORT: ${DL_SERVICE_PORT}
env_file:
- .env
environment:
- CENTRAL_STORAGE_PATH=/service/central_storage
- DB_CONNECTION_URL=postgresql://dlservice_user:SuperSecurePwdHere@postgres:${POSTGRES_PORT}/dlservice_pg_db
networks:
- mlops_network
volumes:
- central_storage:/service/central_storage
- ./services/dl_service/app/:/service/app/
depends_on:
postgres:
condition: service_healthy
healthcheck:
test: ["CMD-SHELL", "curl http://127.0.0.1:$${DL_SERVICE_PORT}/health_check"]
interval: 5s
timeout: 5s
retries: 5
# ui for model service
web_ui:
platform: linux/arm64
container_name: web_ui
build:
context: ./services/web_ui
dockerfile: Dockerfile
args:
WEB_UI_PORT: ${WEB_UI_PORT}
env_file:
- .env
environment:
- PREDICT_ENDPOINT=http://nginx:${NGINX_PORT}/predict/
networks:
- mlops_network
ports:
- "${WEB_UI_PORT}:${WEB_UI_PORT}"
volumes:
- ./services/web_ui/app/:/service/app/
depends_on:
dl_service:
condition: service_healthy
# reverse proxy
nginx:
platform: linux/arm64
container_name: nginx
restart: always
build:
context: ./services/nginx
dockerfile: Dockerfile
env_file:
- .env
networks:
- mlops_network
depends_on:
dl_service:
condition: service_healthy
# model monitoring
evidently:
platform: linux/arm64
container_name: evidently
build:
context: ./services/evidently
dockerfile: Dockerfile
args:
EVIDENTLY_PORT: ${EVIDENTLY_PORT}
env_file:
- .env
networks:
- mlops_network
ports:
- "${EVIDENTLY_PORT}:${EVIDENTLY_PORT}"
volumes:
- evidently_data:/service/${EVIDENTLY_WORKSPACE_NAME}
# overall monitoring & dashboards
grafana:
platform: linux/arm64
image: grafana/grafana-oss:latest
container_name: grafana
restart: unless-stopped
networks:
- backend_network
ports:
- "${GRAFANA_PORT}:3000"
volumes:
- ./services/grafana/grafana_datasources.yml:/etc/grafana/provisioning/datasources/grafana_datasources.yml:ro
- ./services/grafana/grafana_dashboards.yml:/etc/grafana/provisioning/dashboards/grafana_dashboards.yml:ro
- ./services/grafana/dashboards:/opt/grafana/dashboards
- grafana_data:/var/lib/grafana
depends_on:
- prometheus
# time-series database
prometheus:
platform: linux/arm64
image: prom/prometheus:latest
container_name: prometheus
restart: unless-stopped
networks:
- backend_network
ports:
- "${PROMETHEUS_PORT}:9090"
volumes:
- ./services/prometheus/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
- prometheus_data:/prometheus
command: "--config.file=/etc/prometheus/prometheus.yaml"
# host machine's metrics exporter for prometheus
node_exporter:
platform: linux/arm64
image: quay.io/prometheus/node-exporter:v1.5.0
container_name: node_exporter
restart: unless-stopped
pid: host
networks:
- backend_network
volumes:
- /:/host:ro,rslave
command: "--path.rootfs=/host"
# cadvisor
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.47.0
container_name: cadvisor
restart: unless-stopped
networks:
- backend_network
ports:
- "${CADVISOR_PORT}:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
# this line is needed to make it work on Mac M1
- /var/run/docker.sock:/var/run/docker.sock:ro
devices:
- /dev/kmsg
privileged: true
# sql database
postgres:
platform: linux/arm64
container_name: postgres
image: postgres:15.3
restart: always
environment:
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=postgres
- POSTGRES_DB=postgres
networks:
- mlops_network
volumes:
- ./services/postgres/docker_postgres_init.sql:/docker-entrypoint-initdb.d/docker_postgres_init.sql
- pgdata:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
timeout: 5s
retries: 5
# ui for database
pgadmin:
platform: linux/arm64
container_name: pgadmin
image: dpage/pgadmin4
restart: always
environment:
- PGADMIN_DEFAULT_EMAIL=pgadmin@gmail.com
- PGADMIN_DEFAULT_PASSWORD=SuperSecurePwdHere
networks:
- mlops_network
ports:
- "16543:80"
volumes:
- pgadmin_data:/var/lib/pgadmin
depends_on:
postgres:
condition: service_healthy
networks:
mlops_network:
driver: "bridge"
backend_network:
driver: "bridge"
volumes:
mlflow_data:
prefect_data:
pgadmin_data:
grafana_data:
prometheus_data:
pgdata:
evidently_data:
central_storage: