天天看点

python读取本地文件夹下所有文件并插入数据库

#_*_ coding:UTF-8 _*_
import sys,io,os,re
import MySQLdb
def VisitDir(path):
  pathlen=[]
  for root,dirs,files in os.walk(path):
    for filespath in files:
        s=os.path.join(root,filespath)
        pathlen.append(s)
  return pathlen

def addItem(uci,talk_time,content,spider_time,name,majia):
  try:
    value=[]
    value.append(uci)
    value.append(talk_time)
    value.append(content)
    value.append(spider_time)
    value.append(name)
    value.append(majia)
    sql='insert into qq(UIN,SEND_TIME,CONTENT,SPIDER_TIME,GROUP_UIN,MAJIA) values(%s,%s,%s,%s,%s,%s)'
    try:
        cur.execute(sql,value)
        conn.commit()
    except Exception,e:
        print e
        pass;
  except MySQLdb.Error,e:
    print e
    sys.exit()
path=r"C:\Users\hexu\Desktop\20160520"
pathall=VisitDir(path)
conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='test',port=)
for i in range(,len(pathall)-):
    print pathall[i]
    size=os.path.getsize(pathall[i])
    pattern=re.compile(r'\d*(?=\\mr)')
    matche=pattern.search(pathall[i])
    majia=matche.group()
    pattern=re.compile(r'(?<=Desktop\\)\d{8}')
    matche=pattern.search(pathall[i])
    spider_time=matche.group()
    pattern=re.compile(r'(?<=mr_troop_).*')
    matche=pattern.search(pathall[i])
    name= matche.group()
    f = open(pathall[i], "r")
    cur=conn.cursor()
    conn.set_character_set('utf8')
    for line in f:
        content=line.split("\t");
        addItem(content[],content[],content[],spider_time,name,majia);
cur.close()
conn.close()
f.close()
           

该代码实现读取C:\Users\hexu\Desktop\20160520文件夹下所有文件的内容,并插入到本地的mysql数据库,其中有的代码是用正则表达式提取相关内容,和本文的主题无关,可以删除。

注:文件的列是用\t分割,行是用\n分割