首页 > 编程语言 > 详细

大文件切割Demo Python

时间:2021-04-02 17:34:37      阅读:30      评论:0      收藏:0      [点我收藏+]

CSV切割demo:实现了按行数切割以及按文件书切割

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2021/4/2 12:17 下午
# @Author  : Xinlong Chen
# @File    : test.py

import math
import os


class CsvSplit:
    def __mkSubFile(self, lines, dir, head, srcName, sub):
        [des_filename, extname] = os.path.splitext(srcName)
        filename = dir + os.sep + des_filename + ‘_‘ + str(sub) + extname
        print(‘make file: %s‘ % filename)
        fout = open(filename, ‘w‘)
        try:
            fout.writelines([head])
            fout.writelines(lines)
            return sub + 1
        finally:
            fout.close()

    def __readFromCsv(self, filename):
        if os.path.exists(filename):
            with open(filename, ‘r‘) as file:
                head = file.readline()
                lines = file.readlines()
            return head, lines, 0
        else:
            print("error filename")
            return [], [], -1

    def splitByLineCount(self, filename, dir, count: int, lines=[], head=""):
        if len(lines) == 0:
            head, lines, status = self.__readFromCsv(filename)
            if status != 0:
                return

        if not os.path.exists(dir):
            os.makedirs(dir)

        buf = []
        sub = 1
        for line in lines:
            buf.append(line)
            if len(buf) == count:
                sub = self.__mkSubFile(buf, dir, head, filename, sub)
                buf = []
        # judge end
        if len(buf) != 0:
            self.__mkSubFile(buf, dir, head, filename, sub)

    def splitByFileCount(self, filename, dir, filecount=10):
        head, lines, status = self.__readFromCsv(filename)
        if status != 0:
            return
        all_length = len(lines)
        each_file = math.ceil(all_length / filecount)
        self.splitByLineCount(filename, dir, each_file, lines, head)


if __name__ == ‘__main__‘:
    CsvSplit().splitByFileCount(‘weibo.csv‘, dir=‘weibo‘, filecount=10)
    CsvSplit().splitByLineCount(‘weibo.csv‘, dir=‘weibo1‘, count=500000)

大文件切割Demo Python

原文:https://www.cnblogs.com/cxl-/p/14610236.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!