首页 > 其他 > 详细

将xml文件转为txt文件

时间:2019-10-23 17:11:48      阅读:941      评论:0      收藏:0      [点我收藏+]
import os
import re
import sys
import glob
import xml.etree.ElementTree as ET

def xml_to_txt(indir,outdir):

    os.chdir(indir)
    annotations = os.listdir('.')
    annotations = glob.glob(str(annotations)+'*.xml')
    pat = re.compile('(?<=\>).*?(?=\<)')

    for i, file in enumerate(annotations):
        file_save = file.split('.')[0]+'.txt'
        file_txt=os.path.join(outdir,file_save)
        f_w = open(file_txt,'w',encoding="utf-8")

        tree=ET.parse(file)
        root = tree.getroot()

        for obj in root.iter('PostItem'):
                current = list()
                for ele in obj.iter():
                    if "content" in ele.tag:
                        content = obj.find('content').text
                        if content:
                            content = re.sub(r'</?\w+[^>]*>','',content).replace("&nbsp;"," ").strip()
                            print(content)
                            f_w.write(content)
                            f_w.write("\n")
                    if "caption" in ele.tag:
                        caption = obj.find('caption').text
                        if caption:
                            caption = re.sub(r'</?\w+[^>]*>','',caption).replace("&nbsp;"," ").strip()
                            f_w.write(caption)
                            f_w.write("\n")
                            print(caption)

indir='E:\Data\demo-xml' #打开目录
outdir='E:\Data\demo-txt' #保存目录

xml_to_txt(indir,outdir)

将xml文件转为txt文件

原文:https://www.cnblogs.com/hankleo/p/11727219.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!