python labelme dataset class #1511
Replies: 2 comments
-
Certainly! Working with the Here's a basic implementation of a LabelMeDataset Class Implementation import os
import json
import numpy as np
import cv2
import tensorflow as tf
from PIL import Image
from sklearn.model_selection import train_test_split
class LabelMeDataset:
def __init__(self, data_dir, image_size=(256, 256), test_size=0.2, random_state=42):
self.data_dir = data_dir
self.image_size = image_size
self.test_size = test_size
self.random_state = random_state
self.images, self.annotations, self.class_distribution = self.load_data()
def load_data(self):
images = []
annotations = []
class_distribution = {}
# Load images and annotations
for filename in os.listdir(self.data_dir):
if filename.endswith('.json'):
annotation_path = os.path.join(self.data_dir, filename)
with open(annotation_path, 'r') as f:
annotation = json.load(f)
image_path = os.path.join(self.data_dir, annotation['imagePath'])
if not os.path.exists(image_path):
continue
# Load image
image = Image.open(image_path)
image = image.resize(self.image_size)
image = np.array(image) / 255.0 # Normalize to [0, 1]
# Parse annotations
shapes = annotation['shapes']
labels = [shape['label'] for shape in shapes]
for label in labels:
if label in class_distribution:
class_distribution[label] += 1
else:
class_distribution[label] = 1
# Append to lists
images.append(image)
annotations.append(shapes)
# Convert to numpy arrays
images = np.array(images)
annotations = np.array(annotations)
return images, annotations, class_distribution
def show_class_distribution(self):
print("Class Distribution:")
for label, count in self.class_distribution.items():
print(f"{label}: {count}")
def get_datasets(self):
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
self.images, self.annotations, test_size=self.test_size, random_state=self.random_state
)
# Convert to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
return train_dataset, test_dataset
# Example usage
if __name__ == "__main__":
dataset = LabelMeDataset(path='path/to/your/labelme/dataset')
dataset.show_class_distribution()
train_dataset, test_dataset = dataset.get_datasets()
# Now you can use train_dataset and test_dataset with TensorFlow for training
# For example, you can batch and shuffle the datasets
train_dataset = train_dataset.batch(32).shuffle(buffer_size=1000)
test_dataset = test_dataset.batch(32)
# Your training loop here Explanation
Usage
This implementation should help you get started with using the |
Beta Was this translation helpful? Give feedback.
-
Bah voilà ! 👌
Le ven. 29 nov. 2024, 16:41, huanxi0319 ***@***.***> a écrit :
… Certainly! Working with the labelme dataset can indeed be streamlined by
using a pre-built dataset class. While there might not be an official
LabelMeDataset class, you can create or use a community-contributed one
that simplifies the process of loading images and annotations, showing
class distribution, and integrating with TensorFlow for training.
Here's a basic implementation of a LabelMeDataset class that you can use.
This class will handle loading images, parsing annotations, and providing a
convenient interface for TensorFlow.
LabelMeDataset Class Implementation
import osimport jsonimport numpy as npimport cv2import tensorflow as tffrom PIL import Imagefrom sklearn.model_selection import train_test_split
class LabelMeDataset:
def __init__(self, data_dir, image_size=(256, 256), test_size=0.2, random_state=42):
self.data_dir = data_dir
self.image_size = image_size
self.test_size = test_size
self.random_state = random_state
self.images, self.annotations, self.class_distribution = self.load_data()
def load_data(self):
images = []
annotations = []
class_distribution = {}
# Load images and annotations
for filename in os.listdir(self.data_dir):
if filename.endswith('.json'):
annotation_path = os.path.join(self.data_dir, filename)
with open(annotation_path, 'r') as f:
annotation = json.load(f)
image_path = os.path.join(self.data_dir, annotation['imagePath'])
if not os.path.exists(image_path):
continue
# Load image
image = Image.open(image_path)
image = image.resize(self.image_size)
image = np.array(image) / 255.0 # Normalize to [0, 1]
# Parse annotations
shapes = annotation['shapes']
labels = [shape['label'] for shape in shapes]
for label in labels:
if label in class_distribution:
class_distribution[label] += 1
else:
class_distribution[label] = 1
# Append to lists
images.append(image)
annotations.append(shapes)
# Convert to numpy arrays
images = np.array(images)
annotations = np.array(annotations)
return images, annotations, class_distribution
def show_class_distribution(self):
print("Class Distribution:")
for label, count in self.class_distribution.items():
print(f"{label}: {count}")
def get_datasets(self):
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
self.images, self.annotations, test_size=self.test_size, random_state=self.random_state
)
# Convert to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
return train_dataset, test_dataset
# Example usageif __name__ == "__main__":
dataset = LabelMeDataset(path='path/to/your/labelme/dataset')
dataset.show_class_distribution()
train_dataset, test_dataset = dataset.get_datasets()
# Now you can use train_dataset and test_dataset with TensorFlow for training
# For example, you can batch and shuffle the datasets
train_dataset = train_dataset.batch(32).shuffle(buffer_size=1000)
test_dataset = test_dataset.batch(32)
# Your training loop here
Explanation
1.
*Initialization*:
- The LabelMeDataset class is initialized with the path to the dataset
directory, the desired image size, and the test size for splitting the
dataset.
2.
*Loading Data*:
- The load_data method loads images and annotations from the dataset
directory. It also calculates the class distribution.
- Images are resized and normalized to the range [0, 1].
- Annotations are parsed, and the class distribution is updated.
3.
*Showing Class Distribution*:
- The show_class_distribution method prints the class distribution.
4.
*Getting Datasets*:
- The get_datasets method splits the data into training and testing
sets and converts them into TensorFlow datasets.
Usage
- Instantiate the LabelMeDataset class with the path to your dataset.
- Call show_class_distribution to see the class distribution.
- Use get_datasets to get the training and testing datasets, which can
be used directly with TensorFlow for training.
This implementation should help you get started with using the labelme
dataset in a more straightforward manner. If you need additional features
or have specific requirements, you can extend this class accordingly.
—
Reply to this email directly, view it on GitHub
<#1511 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAA57OYY5DTUEPROYSUMPCT2DCDJFAVCNFSM6AAAAABRT62XAWVHI2DSMVQWIX3LMV43URDJONRXK43TNFXW4Q3PNVWWK3TUHMYTCNBRG4YTMNQ>
.
You are receiving this because you authored the thread.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
-
Hi,
To use labelme images, it looks like everybody is writing it's own python dataset class, to :
load images
load annotations
show class distribution
....
This is really not straight.
Is there somewhere a labelmedataset class I can directly use, to do it ?
I will only have to instantiate it using mydataset = LabelMeDataset(path) , and use it directly with TF for training.
Beta Was this translation helpful? Give feedback.
All reactions