#_*_ coding:UTF-8 _*_
import sys,io,os,re
import MySQLdb
def VisitDir(path):
pathlen=[]
for root,dirs,files in os.walk(path):
for filespath in files:
s=os.path.join(root,filespath)
pathlen.append(s)
return pathlen
def addItem(uci,talk_time,content,spider_time,name,majia):
try:
value=[]
value.append(uci)
value.append(talk_time)
value.append(content)
value.append(spider_time)
value.append(name)
value.append(majia)
sql='insert into qq(UIN,SEND_TIME,CONTENT,SPIDER_TIME,GROUP_UIN,MAJIA) values(%s,%s,%s,%s,%s,%s)'
try:
cur.execute(sql,value)
conn.commit()
except Exception,e:
print e
pass;
except MySQLdb.Error,e:
print e
sys.exit()
path=r"C:\Users\hexu\Desktop\20160520"
pathall=VisitDir(path)
conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='test',port=)
for i in range(,len(pathall)-):
print pathall[i]
size=os.path.getsize(pathall[i])
pattern=re.compile(r'\d*(?=\\mr)')
matche=pattern.search(pathall[i])
majia=matche.group()
pattern=re.compile(r'(?<=Desktop\\)\d{8}')
matche=pattern.search(pathall[i])
spider_time=matche.group()
pattern=re.compile(r'(?<=mr_troop_).*')
matche=pattern.search(pathall[i])
name= matche.group()
f = open(pathall[i], "r")
cur=conn.cursor()
conn.set_character_set('utf8')
for line in f:
content=line.split("\t");
addItem(content[],content[],content[],spider_time,name,majia);
cur.close()
conn.close()
f.close()
該代碼實作讀取C:\Users\hexu\Desktop\20160520檔案夾下所有檔案的内容,并插入到本地的mysql資料庫,其中有的代碼是用正規表達式提取相關内容,和本文的主題無關,可以删除。
注:檔案的列是用\t分割,行是用\n分割