- # encoding: utf-8
- # Python 3.6.0
-
- import os
- import codecs
- import re
-
- path='.'
- arr=os.listdir(path)
- for it in arr:
- file=os.path.join(path, it)
- if os.path.isfile(file) and (it[-4:].lower()=='.xml'):
- print(file)
- f=codecs.open(file,'r','utf-8')
- text=f.read()
- f.close()
- txtfile=file+'.txt'
- f=codecs.open(txtfile,'w','gb2312')
- m=re.findall(r'<ID\d*>[\s\S]+?<\/ID\d*>',text)
- for it in m:
- brr=['','','','','']
- a=re.search(r'[^>]+(?=<\/UserType>)',it)
- if a:
- brr[0]=a.group(0)
- b=re.search(r'[^>]+(?=<\/UserVendorId>)',it)
- if b:
- brr[1]=b.group(0)
- c=re.search(r'[^>]+(?=<\/UserSubType>)',it)
- if c:
- brr[2]=c.group(0)
- d=re.search(r'[^>]+(?=<\/Flags>)',it)
- if d:
- brr[3]=d.group(0)
- e=re.search(r'(?<=<!--)[^>]+(?=-->)',it)
- if e:
- brr[4]=e.group(0)
- line='\t'.join(brr)
- f.write(line+'\r\n')
- f.close()
复制代码
|