Skip to content

Commit

Permalink
storage init container changes, fixing imports
Browse files Browse the repository at this point in the history
  • Loading branch information
deepanker13 committed Jan 10, 2024
1 parent 76fb00b commit 2b72670
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 35 deletions.
4 changes: 2 additions & 2 deletions sdk/python/kubeflow/storage_init_container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ FROM python:3.11
WORKDIR /app

# Copy the Python package and its source code into the container
COPY . /app/storage
COPY . /app/storage_init_container

# Copy the requirements.txt file into the container
COPY requirements.txt /app/requirements.txt
Expand All @@ -14,4 +14,4 @@ COPY requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Run storage.py when the container launches
ENTRYPOINT ["python", "storage/storage.py"]
ENTRYPOINT ["python", "-m", "storage_init_container.storage"]
1 change: 1 addition & 0 deletions sdk/python/kubeflow/storage_init_container/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
INIT_CONTAINER_MOUNT_PATH = "/workspace"
30 changes: 24 additions & 6 deletions sdk/python/kubeflow/storage_init_container/hugging_face.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
from kubeflow.storage_init_container.types import *
from kubeflow.storage_init_container.abstract_model_provider import modelProvider
from kubeflow.storage_init_container.abstract_dataset_provider import datasetProvider

INIT_CONTAINER_MOUNT_PATH = "/workspace"
from dataclasses import dataclass, field
import transformers
from peft import LoraConfig
from urllib.parse import urlparse
import json, os
from typing import Union
from .constants import INIT_CONTAINER_MOUNT_PATH
from .abstract_model_provider import modelProvider
from .abstract_dataset_provider import datasetProvider


TRANSFORMER_TYPES = Union[
transformers.AutoModelForSequenceClassification,
transformers.AutoModelForTokenClassification,
transformers.AutoModelForQuestionAnswering,
transformers.AutoModelForCausalLM,
transformers.AutoModelForMaskedLM,
transformers.AutoModelForImageClassification,
]


@dataclass
Expand All @@ -28,7 +42,9 @@ def download_dir(self, value):

@dataclass
class HuggingFaceTrainParams:
training_parameters: TrainingArguments = field(default_factory=TrainingArguments)
training_parameters: transformers.TrainingArguments = field(
default_factory=transformers.TrainingArguments
)
lora_config: LoraConfig = field(default_factory=LoraConfig)


Expand Down Expand Up @@ -82,6 +98,8 @@ def load_config(self, serialised_args):

def download_dataset(self):
print("downloading dataset")
import huggingface_hub
from datasets import load_dataset

if self.config.access_token:
huggingface_hub.login(self.config.access_token)
Expand Down
4 changes: 1 addition & 3 deletions sdk/python/kubeflow/storage_init_container/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,4 @@ transformers>=4.20.0
peft>=0.3.0
huggingface_hub==0.16.4
datasets>=2.13.2
torch>=1.13.1
torchvision>=0.9.1
torchaudio>=0.8.1

20 changes: 17 additions & 3 deletions sdk/python/kubeflow/storage_init_container/s3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from .types import *
from kubeflow.storage_init_container.abstract_dataset_provider import datasetProvider
from dataclasses import dataclass, field
import json, os
import boto3
from urllib.parse import urlparse
from .abstract_dataset_provider import datasetProvider
from .constants import INIT_CONTAINER_MOUNT_PATH


@dataclass
Expand All @@ -10,7 +14,9 @@ class S3DatasetParams:
region_name: str = None
access_key: str = None
secret_key: str = None
download_dir: str = field(default="/workspace/datasets")
download_dir: str = field(
default=os.path.join(INIT_CONTAINER_MOUNT_PATH, "datasets")
)

def is_valid_url(self, url):
try:
Expand All @@ -30,6 +36,14 @@ def __post_init__(self):
raise ValueError("bucket_name or endpoint_url or file_key is None")
self.is_valid_url(self.endpoint_url)

@property
def download_dir(self):
return self.download_dir

@download_dir.setter
def download_dir(self, value):
raise AttributeError("Cannot modify read-only field 'download_dir'")


class S3(datasetProvider):
def load_config(self, serialised_args):
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/kubeflow/storage_init_container/storage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import argparse
from hugging_face import HuggingFace, HuggingFaceDataset
from s3 import S3
from .hugging_face import HuggingFace, HuggingFaceDataset
from .s3 import S3


def model_factory(model_provider, model_provider_parameters):
Expand Down
19 changes: 0 additions & 19 deletions sdk/python/kubeflow/storage_init_container/types.py

This file was deleted.

0 comments on commit 2b72670

Please sign in to comment.