import os import pickle from io import BytesIO import argparse import torch import lmdb from data.dataset import CARS, ImprovedImageFolder import torchvision from tqdm import tqdm def dataset_to_lmdb(dataset, lmdb_path): env = lmdb.open(lmdb_path, map_size=1099511627776*2, subdir=os.path.isdir(lmdb_path)) with env.begin(write=True) as txn: for i in tqdm(range(len(dataset)), ncols=50): buffer = BytesIO() torch.save(dataset[i], buffer) txn.put("{}".format(i).encode(), buffer.getvalue()) txn.put(b"classes_list", pickle.dumps(dataset.classes_list)) txn.put(b"__len__", pickle.dumps(len(dataset))) def transform(save_path, dataset_path): print(save_path, dataset_path) dt = torchvision.transforms.Compose([ torchvision.transforms.Resize((256, 256)), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # origin_dataset = CARS("/data/few-shot/STANFORD-CARS/", transform=dt) origin_dataset = ImprovedImageFolder(dataset_path, transform=dt) dataset_to_lmdb(origin_dataset, save_path) if __name__ == '__main__': parser = argparse.ArgumentParser(description="transform dataset to lmdb database") parser.add_argument('--save', required=True) parser.add_argument('--dataset', required=True) args = parser.parse_args() transform(args.save, args.dataset)