天天看點

python讀取本地檔案夾下所有檔案并插入資料庫

#_*_ coding:UTF-8 _*_
import sys,io,os,re
import MySQLdb
def VisitDir(path):
  pathlen=[]
  for root,dirs,files in os.walk(path):
    for filespath in files:
        s=os.path.join(root,filespath)
        pathlen.append(s)
  return pathlen

def addItem(uci,talk_time,content,spider_time,name,majia):
  try:
    value=[]
    value.append(uci)
    value.append(talk_time)
    value.append(content)
    value.append(spider_time)
    value.append(name)
    value.append(majia)
    sql='insert into qq(UIN,SEND_TIME,CONTENT,SPIDER_TIME,GROUP_UIN,MAJIA) values(%s,%s,%s,%s,%s,%s)'
    try:
        cur.execute(sql,value)
        conn.commit()
    except Exception,e:
        print e
        pass;
  except MySQLdb.Error,e:
    print e
    sys.exit()
path=r"C:\Users\hexu\Desktop\20160520"
pathall=VisitDir(path)
conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='test',port=)
for i in range(,len(pathall)-):
    print pathall[i]
    size=os.path.getsize(pathall[i])
    pattern=re.compile(r'\d*(?=\\mr)')
    matche=pattern.search(pathall[i])
    majia=matche.group()
    pattern=re.compile(r'(?<=Desktop\\)\d{8}')
    matche=pattern.search(pathall[i])
    spider_time=matche.group()
    pattern=re.compile(r'(?<=mr_troop_).*')
    matche=pattern.search(pathall[i])
    name= matche.group()
    f = open(pathall[i], "r")
    cur=conn.cursor()
    conn.set_character_set('utf8')
    for line in f:
        content=line.split("\t");
        addItem(content[],content[],content[],spider_time,name,majia);
cur.close()
conn.close()
f.close()
           

該代碼實作讀取C:\Users\hexu\Desktop\20160520檔案夾下所有檔案的内容,并插入到本地的mysql資料庫,其中有的代碼是用正規表達式提取相關内容,和本文的主題無關,可以删除。

注:檔案的列是用\t分割,行是用\n分割