https://github.com/zhangbo2008/perfect_batch_generator_for_pyton
核心代码如下:
def bylineread(fimename,batchsize=1): batchsize=batchsize with open(fimename) as f: cnt=0 out=[] line = f.readline() while line: out.append(line) cnt+=1 if cnt==batchsize: yield out out=[] cnt=0 line = f.readline() yield out # 用来强制返回最后不成batch的数据. #read是一个生成器对象 read = bylineread(‘1‘,batchsize=2) while 1: try: print(next(read)) except: print(‘over‘) break
| def bylineread(fimename,batchsize=1): | |
| batchsize=batchsize | |
| with open(fimename) as f: | |
| cnt=0 | |
| out=[] | |
| line = f.readline() | |
| while line: | |
| out.append(line) | |
| cnt+=1 | |
| if cnt==batchsize: | |
| yield out | |
| out=[] | |
| cnt=0 | |
| line = f.readline() | |
| yield out # 用来强制返回最后不成batch的数据. | |
| #read是一个生成器对象 | |
| read = bylineread(‘1‘,batchsize=2) | |
| while 1: | |
| try: | |
| print(next(read)) | |
| except: | |
| print(‘over‘) | |
| break |
带batch_size的迭代器读取文件,解决内存不足的大数据处理问题!!!!!!!!!!完美解决
原文:https://www.cnblogs.com/zhangbo2008/p/13373587.html