Skip to content

Commit

Permalink
Merge branch 'master' into feature/l3t3-format-benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
yuriihavrylko authored Feb 18, 2024
2 parents 15cbd16 + 7ff5d76 commit 10dd4b3
Show file tree
Hide file tree
Showing 44 changed files with 1,303 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .dvc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/config.local
/tmp
/cache
5 changes: 5 additions & 0 deletions .dvc/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[core]
remote = minio
['remote "minio"']
url = s3://ml-data
endpointurl = http://10.0.0.6:9000
3 changes: 3 additions & 0 deletions .dvcignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore
35 changes: 35 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ name: Deploy Image to Docker Hub

env:
APP_NAME: 'prjctr'
STREAMLIT_NAME: 'app-streamlit'
FASTAPI_NAME: 'app-fastapi'

on:
push:
Expand All @@ -11,7 +13,31 @@ on:
workflow_dispatch:

jobs:
tests:
runs-on: ubuntu-latest
steps:
- name: 'Checkout GitHub Action'
uses: actions/checkout@main

- name: 'Set up Python'
uses: actions/setup-python@v2
with:
python-version: '3.8'

- name: 'Install dependencies'
run: |
python -m pip install --upgrade pip
pip install -r app/requirements-dev.txt
- name: 'Run pytest'
run: |
cd app/
pytest tests/
env:
PYTHONPATH: '.'

push-image:
needs: tests
runs-on: ubuntu-latest
steps:
- name: 'Checkout GitHub Action'
Expand All @@ -28,3 +54,12 @@ jobs:
run: |
docker build . --tag docker.io/${{ secrets.DH_USERNAME }}/$APP_NAME:latest
docker push docker.io/${{ secrets.DH_USERNAME }}/$APP_NAME:latest
- name: 'Build & Push Image streamlit'
run: |
docker build . --tag docker.io/${{ secrets.DH_USERNAME }}/$STREAMLIT_NAME:latest
docker push docker.io/${{ secrets.DH_USERNAME }}/$STREAMLIT_NAME:latest
- name: 'Build & Push Image fastapi'
run: |
docker build . --tag docker.io/${{ secrets.DH_USERNAME }}/$FASTAPI_NAME:latest
docker push docker.io/${{ secrets.DH_USERNAME }}/$FASTAPI_NAME:latest
30 changes: 28 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,38 @@
FROM python:3.12-slim as builder
FROM python:3.11-slim as builder

WORKDIR /app

COPY app/requirements.txt .
RUN pip install -r requirements.txt

FROM builder
FROM builder AS app-flask

COPY app .
CMD ["gunicorn", "--bind", "0.0.0.0:8000", "app:app"]
EXPOSE 8000

FROM builder AS app-streamlit
CMD streamlit run --server.address 0.0.0.0 --server.port 8080 src/serving/streamlit.py


FROM builder AS app-fastapi
CMD uvicorn --host 0.0.0.0 --port 8090 --workers 4 src.serving.fastapi:app

FROM builder AS app-seldon
EXPOSE 5000
EXPOSE 9000
ENV MODEL_NAME SeldonAPI
ENV SERVICE_TYPE MODEL
COPY app/src/serving/seldon.py /app/SeldonAPI.py

RUN chown -R 8888 /app
RUN mkdir /.cache
RUN chmod 777 /.cache
RUN mkdir /.config
RUN chmod 777 /.config

CMD exec seldon-core-microservice $MODEL_NAME --service-type $SERVICE_TYPE


FROM builder AS app-kserve
ENTRYPOINT ["python", "app/src/serving/kserve.py"]
201 changes: 201 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,131 @@ Works on push to master/feature*
![Alt text](assets/actions.png)


### Streamlit

Run:
```
streamlit run src/serving/streamlit.py
```

![Alt text](assets/streamlit.png)

Deploy k8s:
```
kubectl create -f deployment/app-ui.yml
kubectl port-forward --address 0.0.0.0 svc/app-ui.yml 8080:8080
```

Deploy k8s:
```
kubectl create -f deployment/app-ui.yml
kubectl port-forward --address 0.0.0.0 svc/app-ui.yml 8080:8080
```


### Fast API

Postman

![Alt text](assets/fastapi.png)



Deploy k8s:
```
kubectl create -f deployment/app-fasttext.yml
kubectl port-forward --address 0.0.0.0 svc/app-fasttext 8090:8090
```

### Seldon

Instalation

```
kubectl apply -f https://github.com/datawire/ambassador-operator/releases/latest/download/ambassador-operator-crds.yaml
kubectl apply -n ambassador -f https://github.com/datawire/ambassador-operator/releases/latest/download/ambassador-operator-kind.yaml
kubectl wait --timeout=180s -n ambassador --for=condition=deployed ambassadorinstallations/ambassador
kubectl create namespace seldon-system
helm install seldon-core seldon-core-operator --version 1.15.1 --repo https://storage.googleapis.com/seldon-charts --set usageMetrics.enabled=true --set ambassador.enabled=true --namespace seldon-system
```

Deploy k8s:
```
kubectl create -f deployment/seldon-custom.yaml
```

### Kserve

Deploy k8s:

```
kubectl create -f deployment/kserve.yaml
kubectl get inferenceservice custom-model
```


### Load testing

![Alt text](assets/locust.png)

```
locust -f benchmarks/load_test.py --host=http://localhost:9933 --users 50 --spawn-rate 10 --autostart --run-time 600s
### DVC
Install DVC
```
brew install dvc
```
Init in repo
```
dvc init --subdir
git status
git commit -m "init DVC"
```
Move file with data and add to DVC, commit DBV data config
```
dvc add ./data/data.csv
git add data/.gitignore data/data.csv.dvc
git commit -m "create data"
```
Add remote data storage and push DVC remote config
(ensure that bucket already created)
```
dvc remote add -d minio s3://ml-data
dvc remote modify minio endpointurl [$AWS_ENDPOINT](http://10.0.0.6:9000)

git add .dvc/config
git commit -m "configure remote"
git push
```
Upload data
```
export AWS_ACCESS_KEY_ID='...'
export AWS_SECRET_ACCESS_KEY='...'
dvc push


### Label studio

```
docker pull heartexlabs/label-studio:latest
docker run -it -p 8080:8080 -v `pwd`/mydata:/label-studio/data heartexlabs/label-studio:latest
```

![Alt text](assets/labeling.png)


### Minio setup
Mac/Local
```
Expand Down Expand Up @@ -78,3 +203,79 @@ JSON format demonstrates faster write times but slower read times compared to ot
PARQUET format showcases the fastest write times and relatively fast read times, with a smaller file size after write compared to CSV and JSON.

ORC format exhibits moderate write times and the smallest file size after write among the tested formats, with efficient read times.
=======
### POD autoscaling

Install metric service

```
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
kubectl patch -n kube-system deployment metrics-server --type=json -p '[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-insecure-tls"}]'
```

Run from config

```
kubectl create -f deployment/app-fastapi-scaling.yml
```


### Model optimization

Run pruning:

```
python -m src.model.pruning
```

Run distilation:

```
python -m src.model.distilation
```

### Kafka

Install kafka
```
helm repo add bitnami https://charts.bitnami.com/bitnami
helm install zookeeper bitnami/zookeeper --set replicaCount=1 --set auth.enabled=false --set allowAnonymousLogin=true --set persistance.enabled=false --version 11.0.0
helm install kafka bitnami/kafka --set zookeeper.enabled=false --set replicaCount=1 --set persistance.enabled=false
# eventing
kubectl apply -f https://github.com/knative/eventing/releases/download/knative-v1.9.7/eventing-crds.yaml
kubectl apply -f https://github.com/knative/eventing/releases/download/knative-v1.9.7/eventing-core.yaml
kubectl apply -f https://github.com/knative-sandbox/eventing-kafka/releases/download/knative-v1.9.1/source.yaml
```

Run deployment

```
kubectl apply -f deployment/kafka-infra.yml
kubectl port-forward $(kubectl get pod --selector="app=minio" --output jsonpath='{.items[0].metadata.name}') 9000:9000
mc config host add myminio http://127.0.0.1:9000 miniominio miniominio
mc mb myminio/input
mc mb myminio/output
mc admin config set myminio notify_kafka:1 tls_skip_verify="off" queue_dir="" queue_limit="0" sasl="off" sasl_password="" sasl_username="" tls_client_auth="0" tls="off" client_tls_cert="" client_tls_key="" brokers="kafka-headless.default.svc.cluster.local:9092" topic="test" version=""
mc admin service restart myminio
mc event add myminio/input arn:minio:sqs::1:kafka -p --event put --suffix .json
kubectl create -f deployment/kafka-infra.yml
```

### Data drift detetion

```
python -m src.monitoring.drift
```

![Alt text](assets/drift.png)
Empty file added app/__init__.py
Empty file.
13 changes: 13 additions & 0 deletions app/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,16 @@ pytest==7.4.4
pytest-mock==3.12.0
datasets==2.16.1
pandas==2.1.4
evaluate==0.4.1
great-expectations==0.18.7
pytest==7.4.4
scikit-learn==1.3.2
accelerate==0.25.0
datasets==2.16.1
wandb==0.16.1
httpx==0.23.0
locust==2.20.1
textpruner==1.1.post2
evidently==0.4.13
sentence_transformers==2.2.2
ipykernel==6.28.0
7 changes: 6 additions & 1 deletion app/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@ flask==3.0.0
gunicorn==21.2.0
boto3==1.34.12
botocore==1.34.12

transformers==4.36.2
streamlit==1.29.0
fastapi>=0.95.0
uvicorn>=0.22.0
# kserve==0.11.2
protobuf==3.20.1
Empty file added app/src/__init__.py
Empty file.
Empty file added app/src/helpers/__init__.py
Empty file.
15 changes: 15 additions & 0 deletions app/src/helpers/wandb_registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pathlib import Path
import wandb

def publish_model(model_path: str, project: str, name: str, model_type: str = "model"):
with wandb.init(project=project, job_type="model-publishing") as run:
artifact = wandb.Artifact(name, type=model_type)
artifact.add_dir(model_path)
run.log_artifact(artifact)
print(f"Published {name} to W&B")

def download_model(model_name: str, project: str, download_path: Path, model_type: str = "model"):
with wandb.init(project=project) as run:
artifact = run.use_artifact(model_name, type=model_type)
artifact_dir = artifact.download(root=download_path)
print(f"Downloaded {model_name} to {artifact_dir}")
Loading

0 comments on commit 10dd4b3

Please sign in to comment.