I am working an image retrieval project, for making model more fair i want to construct batches that return:
5 imagesper class, and75 imagesand per batch
I have total 300 classes in my dataset, so it obvious that only 15 classes of images can be contained in each batch.data is balanced this mean there is equal number of images for per class,I am using pytorch.
I have create pytorch dataset and I want to add above functionality in my ImageFolderLoader class whose code I added below.
IMG_EXTENSIONS = [
'.jpg', '.JPG', '.jpeg', '.JPEG',
'.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
]
def is_image_file(filename):
return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
def find_classes(dir):
classes = os.listdir(dir)
classes.sort()
class_to_idx = {classes[i]: i for i in range(len(classes))}
classes = [clss.split('.')[1] for clss in classes]
return classes, class_to_idx
def make_dataset(dir, class_to_idx):
images = []
for target in os.listdir(dir):
d = os.path.join(dir, target)
if not os.path.isdir(d):
continue
for filename in os.listdir(d):
if is_image_file(filename):
path = '{0}/{1}'.format(target, filename)
item = (path, class_to_idx[target])
images.append(item)
return images
def default_loader(path):
return Image.open(path).convert('RGB')
class ImageFolderLoader(Dataset):
def __init__(self, root, transform=None, loader=default_loader,):
classes, class_to_idx = find_classes(root)
imgs = make_dataset(root, class_to_idx)
self.root = root
self.imgs = imgs
self.classes = classes
self.class_to_idx = class_to_idx
self.transform = transform
self.loader = loader
def __getitem__(self, index):
path, target = self.imgs[index]
img = self.loader(os.path.join(self.root, path))
if self.transform is not None:
img = self.transform(img)
return img, target
def __len__(self):
return len(self.imgs)
if there is way to do this then please let me know>.
edit:- Anyone want to see solution for this, i added the solution below after solving this problem.