Merge branch 'release/v0.1.0' into main

S-aiueo32 · May 7, 2021 · c4a3b03 · c4a3b03
2 parents bb157af + 4a53a0b
commit c4a3b03
Show file tree

Hide file tree

Showing 16 changed files with 725 additions and 267 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,23 @@
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+env
+pip-log.txt
+pip-delete-this-directory.txt
+.tox
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+*.log
+.git
+
+.venv
+.pytest_cache
+.ipynb_checkpoints
+notebooks
+examples
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -0,0 +1,52 @@
+name: Build
+
+on:
+  push:
+
+env:
+  DOCKER_BUILDKIT: 1
+  IMAGE_NAME: hiraishin
+  DOCKER_FILE_PATH: Dockerfile
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Cache image
+        uses: actions/cache@v2
+        with:
+          path: /tmp/docker-save
+          key: docker-save-${{ github.sha }}
+
+      - name: Build image
+        run: docker build --target dev -f ${DOCKER_FILE_PATH} -t ${IMAGE_NAME} .
+
+      - name: Save image
+        run: mkdir -p /tmp/docker-save
+          && docker tag ${IMAGE_NAME} ${IMAGE_NAME}-cache
+          && docker save ${IMAGE_NAME}-cache -o /tmp/docker-save/snapshot.tar
+          && ls -lh /tmp/docker-save || true
+
+  test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    needs: build
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Cache docker image
+        uses: actions/cache@v2
+        with:
+          path: /tmp/docker-save
+          key: docker-save-${{ github.sha }}
+
+      - name: Load docker image
+        run: docker load -i /tmp/docker-save/snapshot.tar
+
+      - name: Run pytest
+        run: docker run -v $(pwd)/tests/test-reports:/app/tests/test-reports --rm ${IMAGE_NAME}-cache poetry run pytest
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,37 @@
+name: Publish to PyPI
+
+on: push
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Set up Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: "3.8"
+
+      - name: Install dependencies
+        run: python -m pip install --upgrade pip && pip install poetry
+
+      - name: Build package
+        run: poetry build
+
+      - name: Publish distribution to PyPI
+        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_PASSWORD }}
+
+      - name: Publish distribution to Test PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          user: __token__
+          password: ${{ secrets.TESTPYPI_PASSWORD }}
+          repository_url: https://test.pypi.org/legacy/
+          skip_existing: true
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,54 @@
+# base image
+FROM python:3.8-slim AS base
+
+ENV TZ=Asia/Tokyo \
+    DEBIAN_FRONTEND=noninteractive \
+    # python
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    # pip
+    PIP_NO_CACHE_DIR=off \
+    PIP_DISABLE_PIP_VERSION_CHECK=on \
+    PIP_DEFAULT_TIMEOUT=100 \
+    # poetry
+    POETRY_VERSION="1.1.5" \
+    POETRY_HOME="/opt/poetry" \
+    POETRY_VIRTUALENVS_IN_PROJECT=true \
+    POETRY_NO_INTERACTION=1 \
+    # path
+    PYSETUP_PATH="/opt/pysetup" \
+    VENV_PATH="/opt/pysetup/.venv" \
+    MY_MODULE_PATH="/app/src"
+
+ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH"
+
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y curl unzip tzdata && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+
+# builder image
+FROM base AS builder
+
+RUN curl -sSL "https://raw.githubusercontent.com/python-poetry/poetry/${POETRY_VERSION}/get-poetry.py" | python
+
+WORKDIR $PYSETUP_PATH
+
+COPY poetry.lock pyproject.toml ./
+RUN poetry install --no-dev
+
+RUN echo $MY_MODULE_PATH > $(python -c 'import sys; print(sys.path)' | grep -o "[^']*site-packages")/my_module.pth
+
+
+# dev image
+FROM base AS dev
+
+COPY --from=builder $POETRY_HOME $POETRY_HOME
+COPY --from=builder $PYSETUP_PATH $PYSETUP_PATH
+
+WORKDIR $PYSETUP_PATH
+RUN poetry install
+
+WORKDIR /app
+COPY . .
diff --git a/README.md b/README.md
@@ -1 +1,141 @@
-# hiraishin
+# Hiraishin
+A thin PyTorch-Lightning wrapper for building configuration-based DL pipelines with Hydra.
+
+# Dependencies
+- PyTorch Lightning
+- Hydra
+- Pydantic
+- etc.
+
+# Installation
+
+```shell
+$ pip install -U hiraishin
+```
+
+# Basic workflow
+## 1. Model initialization with type annotations
+Define a model class that has training components of PyTorch as instance variables.
+
+```python
+import torch.nn as nn
+import torch.optim as optim
+
+from hiraishin.models import BaseModel
+
+
+class ToyModel(BaseModel):
+
+    net: nn.Linear
+    criterion: nn.CrossEntropyLoss
+    optimizer: optim.Adam
+    scheduler: optim.lr_schedulers.ExponentialLR
+
+    def __init__(self, config: DictConfig) -> None:
+        self.initialize(config)  # call `initialize()` instead of `super()__init__()`
+```
+
+We recommend that they have the following prefix to indicate their role.
+
+- `net` for networks. It must be a subclass of `nn.Module` to initialize and load weights.
+- `criterion` for loss functions. 
+- `optimizer` for optimizers. It must be subclass of `Optimizer`.
+- `scheduler` for schedulers. It must be subclass of `_LRScheduler` and the suffix must match to the corresponding optimizer.
+
+If you need to define modules besides the above components (e.g. tokenizers), feel free to define them. The modules will be defined with the names you specify.
+
+## 2. Generating configuration file
+Hiraishin has the functionality to generate configuration files on the command line.
+If the above class was written in `model.py` at the same level as the current directory, you can generate it with the following command.
+
+```shell
+$ hiraishin configen model.ToyModel
+The config has been generated! --> config/model/toy.yaml
+```
+
+Let's take a look at the generated file.
+The positional arguments are filled with `???` that indicates mandatory parameters in Hydra.
+We recommend overwriting them with the default value, otherwise, you must give them through command-line arguments for every run.
+
+```yaml
+_target_: model.ToyModel
+_recursive_: false
+config:
+  networks:
+  - name: net
+    args:
+      _target_: torch.nn.Linear
+      _recursive_: true
+      in_features: ???  # -> 1
+      out_features: ???  # -> 1
+    init:
+      weight_path: null
+      init_type: null
+      init_gain: null
+  losses:
+  - name: criterion
+    args:
+      _target_: torch.nn.CrossEntropyLoss
+      _recursive_: true
+    weight: 1.0
+  optimizers:
+  - name: optimizer
+    args:
+      _target_: torch.optim.Adam
+      _recursive_: true
+    params:
+    - ???  # -> net
+    scheduler:
+      args:
+        _target_: torch.optim.lr_scheduler.ExponentialLR
+        _recursive_: true
+        gamma: ???  # -> 1
+      interval: epoch
+      frequency: 1
+      strict: true
+      monitor: null
+  modules: null
+```
+
+## 3. Training routines definition
+The rest of model definition is only defining your training routine along with the style of PyTorch Lightning.
+```python
+class ToyModel(BaseModel):
+
+    ...
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.net(x)
+
+    def training_step(self, batch, *args, **kwargs) -> torch.Tensor:
+        x, target = batch
+        pred = self.forward(x)
+        loss = self.criterion(pred, target)
+        self.log('loss/train', loss)
+        return loss
+```
+
+## 4. Model Instantiation
+The defined model can be instantiated from configuration file. Try to train and test models!
+```python
+from hydra.utils import inatantiate
+from omegeconf import OmegaConf
+
+
+def app():
+    ...
+
+    config = OmegaConf.load('config/model/toy.yaml')
+    model = inatantiate(config)
+
+    print(model)
+    # ToyModel(
+    #     (net): Linear(in_features=1, out_features=1, bias=True)
+    #     (criterion): CrossEntropyLoss()
+    # )
+
+    trainer.fit(model, ...)
+```
+
+# License
+Hiraishin is licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE) for the full license text.