Skip to content

Commit

Permalink
Merge pull request #62 from kerrlabajo/feat/docker-enhancement
Browse files Browse the repository at this point in the history
Allow generic dataset's yaml config to allow different type of datasets regardless of structure
  • Loading branch information
kerrlabajo authored May 16, 2024
2 parents 95f4dac + cd1172e commit d210ebb
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
3 changes: 3 additions & 0 deletions docker/scripts/pull_build_push.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ NEW_TAG="${VERSION}${TAG_BASE}"
# Authenticate Docker to ECR
aws ecr get-login-password --region ${AWS_REGION} | sudo docker login --username AWS --password-stdin ${ECR_URL}

# Check if the repository exists, if not create it
aws ecr describe-repositories --repository-names ${DOCKER_IMAGE} > /dev/null 2>&1 || aws ecr create-repository --repository-name ${DOCKER_IMAGE} > /dev/null 2>&1

# Build and push the image
sudo docker build -t ${ECR_URL}/${DOCKER_IMAGE}:${NEW_TAG} -f ../yolov5-training/Dockerfile ../yolov5-training
sudo docker push ${ECR_URL}/${DOCKER_IMAGE}:${NEW_TAG}
Expand Down
34 changes: 30 additions & 4 deletions docker/yolov5-training/configure_dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import yaml
import argparse
import os
import glob

# Define the argument parser
parser = argparse.ArgumentParser(description='Configure dataset')
Expand All @@ -15,15 +16,40 @@
# Define the file path
FILE_PATH = args.dataset_config_path

# Define the new paths
NEW_PATH = f"/opt/ml/input/data/{DATASET_NAME}"
NEW_TRAIN = f"{NEW_PATH}/images/train"
NEW_VAL = f"{NEW_PATH}/images/train"
# Get the directory from the dataset_config_path
dir_path = os.path.dirname(args.dataset_config_path)

# Check if DATASET_NAME.yaml exists
if not os.path.isfile(FILE_PATH):
# If not, find any .yaml file in the current directory
yaml_files = glob.glob(os.path.join(dir_path, '*.yaml'))
if yaml_files:
# Rename the first .yaml file to DATASET_NAME.yaml
os.rename(yaml_files[0], FILE_PATH)
else:
raise FileNotFoundError("No .yaml file found to rename")

# Open and load the YAML file
with open(FILE_PATH, 'r') as file:
data = yaml.safe_load(file)

# Check if DATASET_NAME is in the train and val paths
if DATASET_NAME in data['train'] and DATASET_NAME in data['val']:
# Extract subdirectories after the dataset name in the original paths
train_subdirs = data['train'].split(DATASET_NAME, 1)[1]
val_subdirs = data['val'].split(DATASET_NAME, 1)[1]
elif data['train'].startswith('..') and data['val'].startswith('..'):
# Remove the '..' from the original paths
train_subdirs = data['train'][2:]
val_subdirs = data['val'][2:]
else:
raise ValueError("Invalid format for train or val paths")

# Define the new paths
NEW_PATH = f"/opt/ml/input/data/{DATASET_NAME}"
NEW_TRAIN = f"{NEW_PATH}{train_subdirs}".replace('\\', '/')
NEW_VAL = f"{NEW_PATH}{val_subdirs}".replace('\\', '/')

# Modify the values
data['path'] = NEW_PATH
data['train'] = NEW_TRAIN
Expand Down

0 comments on commit d210ebb

Please sign in to comment.