总共数据是202599张图片,在这里的分了训练集162770(比例0.8),验证集19867(比例0.098),测试集19962(比例0.096)
这里是采用建立连接的方式
1 rel_link = os.path.relpath(in_file, out_dir) # 得到从out_file到in_file的相对路径 2 os.symlink(rel_link, link_file) # link_file使用链接连接到rel_link上面,也就是目标地址连接到源地址上面
1 # check, if file exists, make link 2 def check_link(in_dir, basename, out_dir): 3 in_file = os.path.join(in_dir, basename) 4 if os.path.exists(in_file): 5 link_file = os.path.join(out_dir, basename) 6 rel_link = os.path.relpath(in_file, out_dir) # from out_dir to in_file 7 os.symlink(rel_link, link_file) 8 9 def add_splits(data_path): 10 images_path = os.path.join(data_path, ‘Img/img_align_celeba‘) 11 train_dir = os.path.join(data_path, ‘splits‘, ‘train‘) 12 valid_dir = os.path.join(data_path, ‘splits‘, ‘valid‘) 13 test_dir = os.path.join(data_path, ‘splits‘, ‘test‘) 14 if not os.path.exists(train_dir): 15 os.makedirs(train_dir) 16 if not os.path.exists(valid_dir): 17 os.makedirs(valid_dir) 18 if not os.path.exists(test_dir): 19 os.makedirs(test_dir) 20 21 # these constants based on the standard CelebA splits 22 NUM_EXAMPLES = 202599 23 TRAIN_STOP = 162770 24 VALID_STOP = 182637 25 26 for i in range(0, TRAIN_STOP): 27 basename = "{:06d}.jpg".format(i+1) 28 check_link(images_path, basename, train_dir) 29 for i in range(TRAIN_STOP, VALID_STOP): 30 basename = "{:06d}.jpg".format(i+1) 31 check_link(images_path, basename, valid_dir) 32 for i in range(VALID_STOP, NUM_EXAMPLES): 33 basename = "{:06d}.jpg".format(i+1) 34 check_link(images_path, basename, test_dir) 35 36 if __name__ == ‘__main__‘: 37 base_path = ‘../DATA/CelebA‘ 38 add_splits(base_path)
原文:https://www.cnblogs.com/Overture/p/14626927.html