-
Notifications
You must be signed in to change notification settings - Fork 191
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
130 changed files
with
5,616 additions
and
1,001 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,6 @@ omit = | |
|
||
# avoid measuring code of unittest | ||
tests/* | ||
|
||
[report] | ||
ignore_errors = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# This workflow will install Python dependencies, run tests and lint with a single version of Python | ||
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python | ||
|
||
name: performance_benchmark | ||
|
||
on: | ||
workflow_dispatch: | ||
push: | ||
branches: | ||
- main | ||
|
||
permissions: | ||
contents: read | ||
|
||
env: | ||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | ||
|
||
jobs: | ||
perf_bench: | ||
runs-on: [GPU, unittest] | ||
environment: Testing | ||
steps: | ||
- uses: actions/checkout@v3 | ||
with: | ||
path: dj-${{ github.run_id }} | ||
|
||
- name: Setup docker compose | ||
working-directory: dj-${{ github.run_id }}/.github/workflows/docker | ||
run: | | ||
docker compose up -d | ||
- name: Install data-juicer | ||
working-directory: dj-${{ github.run_id }}/.github/workflows/docker | ||
run: | | ||
docker compose exec ray-head pip install -e .\[all\] | ||
- name: Clean dataset cache | ||
working-directory: dj-${{ github.run_id }}/.github/workflows/docker | ||
run: | | ||
docker compose exec ray-head rm -rf /data/huggingface/dataset | ||
- name: Run performance benchmark standalone | ||
working-directory: dj-${{ github.run_id }}/.github/workflows/docker | ||
run: | | ||
docker compose exec ray-head bash tests/benchmark_performance/run.sh ${{ secrets.INTERNAL_WANDB_URL }} ${{ secrets.INTERNAL_WANDB_API_KEY }} | ||
- name: Remove docker compose | ||
working-directory: dj-${{ github.run_id }}/.github/workflows/docker | ||
if: always() | ||
run: | | ||
docker compose down --remove-orphans | ||
- name: Cleanup workspace | ||
if: always() | ||
run: | | ||
rm -rf dj-${{ github.run_id }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,39 @@ | ||
# The data-juicer image includes all open-source contents of data-juicer, | ||
# and it will be instaled in editable mode. | ||
|
||
FROM python:3.8.18 | ||
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 | ||
|
||
# install python 3.10 | ||
RUN apt-get update \ | ||
&& apt-get install -y git curl vim wget python3.10 libpython3.10-dev python3-pip \ | ||
&& apt-get install -y libgl1-mesa-glx libglib2.0-0 \ | ||
&& ln -sf /usr/bin/python3.10 /usr/bin/python3 \ | ||
&& ln -sf /usr/bin/python3.10 /usr/bin/python \ | ||
&& apt-get autoclean && rm -rf /var/lib/apt/lists/* \ | ||
&& pip install --upgrade pip | ||
|
||
# install 3rd-party system dependencies | ||
RUN apt-get update \ | ||
&& apt-get install ffmpeg libsm6 libxext6 software-properties-common build-essential cmake gfortran libopenblas-dev liblapack-dev -y | ||
|
||
# prepare the java env | ||
WORKDIR /opt | ||
# download jdk | ||
RUN wget https://aka.ms/download-jdk/microsoft-jdk-17.0.9-linux-x64.tar.gz -O jdk.tar.gz && \ | ||
tar -xzf jdk.tar.gz && \ | ||
rm -rf jdk.tar.gz && \ | ||
mv jdk-17.0.9+8 jdk | ||
RUN wget https://aka.ms/download-jdk/microsoft-jdk-17.0.9-linux-x64.tar.gz -O jdk.tar.gz \ | ||
&& tar -xzf jdk.tar.gz \ | ||
&& rm -rf jdk.tar.gz \ | ||
&& mv jdk-17.0.9+8 jdk | ||
|
||
# set the environment variable | ||
ENV JAVA_HOME=/opt/jdk | ||
|
||
WORKDIR /data-juicer | ||
|
||
# install requirements which need to be installed from source | ||
RUN pip install git+https://github.com/xinyu1205/recognize-anything.git --default-timeout 1000 | ||
|
||
# install requirements first to better reuse installed library cache | ||
COPY environments/ environments/ | ||
RUN cat environments/* | xargs pip install --default-timeout 1000 | ||
RUN pip install --upgrade setuptools==69.5.1 setuptools_scm \ | ||
&& pip install git+https://github.com/xinyu1205/recognize-anything.git --default-timeout 1000 | ||
|
||
# install data-juicer then | ||
COPY . . | ||
RUN pip install -v -e .[all] | ||
RUN pip install -v -e .[sandbox] | ||
|
||
# install 3rd-party system dependencies | ||
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y | ||
RUN pip install -v -e .[all] --default-timeout 1000 | ||
RUN pip install -v -e .[sandbox] --default-timeout 1000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.