天天看点

pandas模仿excel对数据处理并可视化

import pandsa as pd

def filterdata():
   df = pd.read_csv('C:/Users/Desktop/data.csv',encoding='ISO-8859-1')  #也可以用df = pd.read_csv('C:/Users/Desktop/data.csv',encoding='ISO-8859-1',index_col='CUT_STIME', parse_dates=True).sort_index()排序按index_col做索引
   # print(df.query('CUT_EQPID==198'))刷选单列数据
   df = df.astype({'CUT_STIME':'str'})  #将该列转为字符串
   df['CUT_STIME'] = df['CUT_STIME'].apply(lambda x :x.split(' ')[0])
   df['CUT_STIME'] = pd.to_datetime(df['CUT_STIME'])
   df = df.sort_values(by = "CUT_STIME",ascending = True) #排序
   # print(df[((df['CUT_EQPID']==198) | (df['CUT_EQPID']==209) | (df['CUT_EQPID']==210) | (df['CUT_EQPID']==211) | (df['CUT_EQPID']==212) | (df['CUT_EQPID']==213)) & (df['TYPE']=='EK-CAHR306B')])
   df = df[((df['CUT_EQPID']==198) | (df['CUT_EQPID']==209) | (df['CUT_EQPID']==210) | (df['CUT_EQPID']==211) | (df['CUT_EQPID']==212) | (df['CUT_EQPID']==213)) & (df['TYPE']=='EK-CAHR306B')]
   columns = list(df['CUT_STIME'].drop_duplicates())
   cutdeviate = []               #切例
   pcollapse = []                #正例
   bcollapse = []                #背例
   shuangbao = []                #双例
   aoiok = []                    #a率
   cutok = []                    #切率
   for i in range(len(columns)):
      each = df[df['CUT_STIME']==columns[i]]
      cutdeviate84 = each['BIN84'].sum()
      cutdeviate85 = each['BIN85'].sum()
      pcollapse81 = each['BIN81'].sum()
      bcollapse83 = each['BIN83'].sum()
      bcollapse92 = each['BIN92'].sum()
      shuangbao80 = each['BIN80'].sum()
      AOINGEA =  each['AOI_NGEA'].sum()
      total = each['TTLEA'].sum()
      cutdeviate.append((cutdeviate84+cutdeviate85)/total*100)
      pcollapse.append(pcollapse81/total*100)
      bcollapse.append((bcollapse83+bcollapse92)/total*100)
      shuangbao.append(shuangbao80/total*100)
      aoiok.append((total-AOINGEA)/total*100)
      cutok.append(100-cutdeviate[i]-pcollapse[i]-bcollapse[i]-shuangbao[i])
   return columns,cutdeviate,pcollapse,bcollapse,shuangbao,aoiok,cutok


def main():
   columns,cutdeviate,pcollapse,bcollapse,shuangbao,aoiok,cutok = filterdata()
   #设置数据
   data1 = cutdeviate
   data2 = pcollapse
   data3 = bcollapse
   #设置柱状图的主标题与副标题
   bar = Bar()
   #设置行名
   bar.add_xaxis(columns)
   bar.add_yaxis("切例",data1,stack=True,category_gap="50%")
   bar.add_yaxis("正例",data2,stack=True,category_gap="50%")
   bar.add_yaxis("背例",data3,stack=True,category_gap="50%") #category_gap="50%"为柱间距离
   bar.set_series_opts(label_opts=opts.LabelOpts(is_show=False),bar_category_gap = '30%')  #bar_category_gap = '30%'为柱的高度
   bar.extend_axis(yaxis=opts.AxisOpts(min_=70,max_=100,axistick_opts=opts.AxisTickOpts(is_inside=True,),axislabel_opts=opts.LabelOpts(formatter="{value}%", position="right")))                 
   # bar.set_global_opts(title_opts=opts.TitleOpts(title="321自动切割分析",subtitle="良率分析"))
   bar.set_global_opts(
         xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-8)),
         title_opts=opts.TitleOpts(title="析",subtitle="u析"),
         datazoom_opts=[opts.DataZoomOpts()],)
   bar.set_series_opts(
            label_opts=opts.LabelOpts(
                position="right",),
                formatter=JsCode("function(x){return Number(x.data.percent * 100).toFixed() + '%';}"),)
   line = Line()
   line.add_xaxis(columns)
   line.add_yaxis("A率",aoiok,yaxis_index=1)
   line.add_yaxis("切率",cutok,yaxis_index=1)
   line.set_series_opts(
         label_opts=opts.LabelOpts(
               position="right",),
               formatter=JsCode("function(x){return Number(x.data.percent * 100).toFixed() + '%';}"),)		#图中数字显示的方向
   # line.set_global_opts(yaxis_opts=opts.AxisOpts())  
   bar.overlap(line)
   bar.render()

           

实际效果如下图:

pandas模仿excel对数据处理并可视化

以下代码实现各个种类工作的时间,然后做成数据可视化,在 内网 下实现。可不用在线,但是需要去https://assets.pyecharts.org/assets/echarts.min.js复制该网站内容并且保存本地echarts.min.js,即可实现 内网 操作

import os
import datetime 
import csv
import re
import ast
import pandas as pd
from pyecharts.charts import Bar,Line,Grid,Page
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
import shutil

class PageBase(Page):
    #继承Page组合图表基类,以重新设置JS加载路径
    def __init__(self, **kwargs):
        super(PageBase, self).__init__(**kwargs)
        self.js_host = ' '
class datahandle():
    def __init__(self):
        self.path = '//127.0.0.1/ip/'
        self.newpath = 'C:/Users/Desktop/datahandle/week/'
        self.collectinfoDir = 'C:/Users/Desktop/datahandle/'
        self.srcfile = "C:/Users/Desktop/datahandle/"
        self.dstfile = r'//127.0.0.1/ip/c/'

        self.adnum = [12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24]
        # self.adnum = [213]
         
    def cleanfile(self):
        filelist=os.listdir(self.newpath)                
        for file in filelist:
            fpath = os.path.join(self.newpath,file)
            if os.path.isfile(fpath):
                os.remove(fpath)

    def filtdata(self):
        weeks = '{:%Y-%m-%d}'.format(datetime.datetime.now() - datetime.timedelta(days=8))
        weeks = datetime.datetime.strptime(weeks, "%Y-%m-%d")
        #weeke = '{:%Y-%m-%d}'.format(datetime.datetime.now() - datetime.timedelta(days=1))
        weeke = '{:%Y-%m-%d}'.format(datetime.datetime.now())
        weeke = datetime.datetime.strptime(weeke, "%Y-%m-%d")
        filelist = os.listdir(self.path)
        for file in filelist:
            shotname,_ = os.path.splitext(file)     #名字,格式
            shotname = shotname.split("AD")[0]
            shotname = datetime.datetime.strptime(shotname, "%Y-%m-%d")
            if shotname > weeks and shotname < weeke:
                srcfile = os.path.join(self.path,file)
                dstfile = os.path.join(self.newpath,file)
                shutil.copyfile(srcfile,dstfile)


    def workframe(self):
        filelist = os.listdir(self.newpath)
        df = pd.DataFrame(columns=['fpcs','pausetime','trypausetime','errtime','finishtime'],index=self.adnum)
        # print(df)
        for i in range(len(self.adnum)):
            adnum = self.adnum[i]
            dateeg = 0
            okexistloc = False
            stime = []
            pausetime = []
            err = []
            exitime = []
            trytime = []
            ftime = []
            totalinfo = []
            for file in filelist:
                shotname,_ = os.path.splitext(file)     #名字,格式
                # print(shotname)
                shotname = int(shotname.split("D")[-1])
                if shotname != adnum:
                    continue
                else:
                    dateeg += 1
                    okexistloc = True
                    filedir = os.path.join(self.path,file)
                    with open(filedir,'r',newline='',encoding='utf-8') as csvfile:
                        rows = csv.reader(csvfile)
                        for i,row in enumerate(rows):
                            # print(row)
                            matchobj1 = re.findall("start",row[3])
                            matchobj2 = re.findall("pause",row[3])
                            matchobj3 = re.findall("err",row[3])
                            matchobj4 = re.findall("exit",row[3])
                            matchobj5 = re.findall("trypause",row[3])
                            matchobj6 = re.findall("finish",row[3])
                            if matchobj1:
                                stime.append(row[1])
                            if matchobj2:
                                t = ','.join(row[4:])
                                t = ast.literal_eval(t)
                                # print()
                                pausetime.append(t['usetime'])
                            if matchobj3:
                                err.append(row[1])
                            if matchobj4:
                                exitime.append(row[1])
                            if matchobj5:
                                t = ','.join(row[4:])
                                t = ast.literal_eval(t)
                                trytime.append(t['usetime'])
                            if matchobj6:
                                ftime.append(row[1])
            if okexistloc:
                existloc = False
                stime = sorted(list(set(stime)))   
                ftime =  sorted(list(set(ftime))) 
                totalinfo.append(round(len(ftime)/dateeg,2))
                ppausetime = round(sum(pausetime)/dateeg/3600,2)  
                totalinfo.append(ppausetime)
                # print('ppausetime',ppausetime) 
                ttrytime = round(sum(trytime)/dateeg/3600,2) 
                totalinfo.append(ttrytime) 
                # print('ttrytime',ttrytime) 
                errpstime = []
                fishtime = []               #保存切割完成时间
                exitime = sorted(list(set(exitime)))  
                err = sorted(list(set(err))) 
                #err += exitime
                #err = sorted(list(set(err))) 
                # print(stime)
                # print(len(ftime),'____________',ftime)
                # print(err)
                if len(err)>0:
                    # print(len(err))
                    for j in range(len(err)):
                        errtime = err[j]
                        for n in range(len(stime)):
                            if stime[n] > errtime:
                                p1 = datetime.datetime.strptime(errtime,'%Y-%m-%d %H:%M:%S')
                                s1 = datetime.datetime.strptime(stime[n],'%Y-%m-%d %H:%M:%S')
                                errpstime.append((s1-p1).seconds)
                                # print("8888888888888")
                                break
                eerrpstime = round(sum(errpstime)/dateeg/3600,2) 
                totalinfo.append(eerrpstime) 
                # print('eerrpstime',eerrpstime)
                if len(ftime)>0:
                    # print(len(ftime))
                    for i in range(len(ftime)):
                        fftime = ftime[i]
                        if i == 0:
                            # print('@@@@@@@@@@@')
                            if fftime > stime[0]:
                                s1 = datetime.datetime.strptime(stime[0],'%Y-%m-%d %H:%M:%S')
                                f1 = datetime.datetime.strptime(fftime,'%Y-%m-%d %H:%M:%S')
                                fishtime.append((f1-s1).seconds/3600)
                        else:
                            for m in range(len(stime)):
                                if stime[m] > ftime[i-1]:
                                    if stime[m] < fftime:
                                        s2 = datetime.datetime.strptime(stime[m],'%Y-%m-%d %H:%M:%S')
                                        f2 = datetime.datetime.strptime(fftime,'%Y-%m-%d %H:%M:%S')
                                        t = (f2-s2).seconds/3600
                                        if t > 1.8:
                                            fishtime.append(t)
                                            # print(stime[m],fftime)
                                            break
                                        else:
                                            s3 = datetime.datetime.strptime(ftime[i-1],'%Y-%m-%d %H:%M:%S')
                                            f3 = datetime.datetime.strptime(fftime,'%Y-%m-%d %H:%M:%S')
                                            fishtime.append((f3-s3).seconds/3600)
                                            # print('ftime[i-1]',ftime[i-1],fftime)
                                            break
                                    else:
                                        s3 = datetime.datetime.strptime(ftime[i-1],'%Y-%m-%d %H:%M:%S')
                                        f3 = datetime.datetime.strptime(fftime,'%Y-%m-%d %H:%M:%S')
                                        fishtime.append((f3-s3).seconds/3600)
                                        # print('ftime[i-1]',ftime[i-1],fftime)
                                        break
                ffishtime = round(sum(fishtime)/dateeg,2) 
                totalinfo.append(ffishtime)
                df.loc[adnum] = totalinfo
        df.to_csv('C:/Users/Desktop/datahandle/datarecord.csv',encoding="ANSI")
        # print(df)        
        return df

    
    def workview(self):
        df = self.adok()
        # print(df)
        #设置数据
        fpcs = list(df['fpcs'])
        pausetime = list(df['pausetime'])
        trypausetime = list(df['trypausetime'])
        errtime = list(df['errtime'])
        finishtime = list(df['finishtime'])
        cuttime = [round(finishtime[i] - pausetime[i] - trypausetime[i] - errtime[i],2) for i in range(len(pausetime))]
        freetime = [round(24-finishtime[i],2) for i in range(len(finishtime))]
        #设置柱状图的主标题与副标题
        bar = Bar()
        #设置行名(总体良率)
        bar.add_xaxis(self.adnum)
        bar.add_yaxis("正时间",pausetime,stack=True,category_gap="50%")
        bar.add_yaxis("试时间",trypausetime,stack=True,category_gap="50%")
        bar.add_yaxis("err时间",errtime,stack=True,category_gap="50%")
        bar.add_yaxis("cut时间",cuttime,stack=True,category_gap="50%")
        bar.add_yaxis("free时间",freetime,stack=True,category_gap="50%")
        bar.set_series_opts(label_opts=opts.LabelOpts(is_show=False),bar_category_gap = '30%')
        bar.extend_axis(yaxis=opts.AxisOpts(min_=0,max_=10,axistick_opts=opts.AxisTickOpts(is_inside=True,),axislabel_opts=opts.LabelOpts(formatter="{value}pcs", position="right")))           
        bar.set_global_opts(
                xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)),
                yaxis_opts=opts.AxisOpts(max_=24,axislabel_opts=opts.LabelOpts(formatter="{value}h")),
                title_opts=opts.TitleOpts(title="作业分析",subtitle="week"),
                datazoom_opts=[opts.DataZoomOpts(range_start=0,range_end=100)],)    #设置滚动条的位置,和长度范围
        bar.set_series_opts(
                label_opts=opts.LabelOpts(
                    position="right",),
                    formatter=JsCode("function(x){return Number(x.data.percent * 100).toFixed() + '%';}"),)
        ssline = Line()
        ssline.add_xaxis([i for i in range(len(self.adnum))])
        ssline.add_yaxis("作业时间",finishtime,is_connect_nones=True)
        ssline.add_yaxis("pcs",fpcs,yaxis_index=1,is_connect_nones=True)
        ssline.set_series_opts(
                label_opts=opts.LabelOpts(
                    position="right",),
                    formatter=JsCode("function(x){return Number(x.data.percent * 100).toFixed() + '%';}"),)
        bar.overlap(ssline)
        return bar




    def productionok(self,df,daylist,adnumlist):
        filelist = os.listdir(self.newpath)
        for file in filelist:
            fishtime = []
            shotname,_ = os.path.splitext(file)
            day = shotname.split("AD")[0]
            adnum = shotname.split("AD")[-1]
            filedir = os.path.join(self.path,file)
            with open(filedir,'r',newline='',encoding='utf-8') as csvfile:
                rows = csv.reader(csvfile)
                for row in rows:
                    matchobj = re.findall("finish",row[3])
                    if matchobj:
                        fishtime.append(row[1])
            # fishtime = sorted(list(set(fishtime)))
            for i in range(len(fishtime)):
                for j in range(len(fishtime)-i-1):
                    f = datetime.datetime.strptime(fishtime[i],'%Y-%m-%d %H:%M:%S')
                    s = datetime.datetime.strptime(fishtime[j],'%Y-%m-%d %H:%M:%S')
                    if (f-s).seconds/3600 > 1.5:
                        fishtime[i],fishtime[j] = fishtime[j],fishtime[i]
                    elif (f-s).seconds/3600 > 0:
                        del fishtime[j]
            df.loc[day,adnum] = len(fishtime)
        #设置数据
        lline = Line()
        lline.add_xaxis(daylist)
        lline.set_global_opts(
                xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)),
                yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(formatter="{value}/pcs")),
                title_opts=opts.TitleOpts(title="pcs"),
                datazoom_opts=[opts.DataZoomOpts(range_start=0,range_end=100)],)
        lline.set_series_opts(
                label_opts=opts.LabelOpts(
                    position="right",),
                    formatter=JsCode("function(x){return Number(x.data.percent * 100).toFixed() + '%';}"),)
        for i in range(len(adnumlist)):
            pcs = list(df[adnumlist[i]])
            lline.add_yaxis(adnumlist[i],pcs, is_connect_nones=True)
        #lline.render("pcs.html")
        #设置数据
        line = Line()
        line.add_xaxis(daylist)
        line.set_global_opts(
                xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)),
                yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(formatter="{value}pcs/h")),
                title_opts=opts.TitleOpts(title="生产力"),
                datazoom_opts=[opts.DataZoomOpts(range_start=0,range_end=100)],)
        line.set_series_opts(
                label_opts=opts.LabelOpts(
                    position="right",),
                    formatter=JsCode("function(x){return Number(x.data.percent * 100).toFixed() + '%';}"),)
        for i in range(len(adnumlist)):
            pcs1 = list(df[adnumlist[i]].values/24)
            ppcs1 = []
            for j in range(len(pcs1)):
                ppcs1.append(round(pcs1[j],2))
            line.add_yaxis(adnumlist[i],ppcs1, is_connect_nones=True)
        df.to_csv('C:/Users/Desktop/datahandle/pcs.csv',encoding="ANSI")
        return lline,line,df
    
    def createtable(self):
        daylist = []
        adnumlist = []
        filelist = os.listdir(self.newpath)
        for file in filelist:
            shotname,_ = os.path.splitext(file)
            day = shotname.split("AD")[0]
            daylist.append(day)
            adnum = shotname.split("AD")[-1]
            adnumlist.append(adnum)
        daylist = sorted(list(set(daylist)))
        adnumlist = sorted(list(set(adnumlist)))
        df = pd.DataFrame(columns=adnumlist,index=daylist)
        return df,daylist,adnumlist




    def main(self):
        self.cleanfile()
        self.filtdata()
        bar = self.dataview()
        df,daylist,adnumlist = self.createtable()
        lline,line,dfpcs = self.productionok(df,daylist,adnumlist)
        page = PageBase()
        page.add(bar,lline,line)
        workname = "C:/Users/Desktop/datahandle/{:%Y-%m-%d}machine_efficiency.html".format(datetime.datetime.now())
        workname1= "{:%Y-%m-%d}machine_efficiency.html".format(datetime.datetime.now())
        page.render(workname)
        self.srcfile = os.path.join(self.srcfile,workname1)
        self.dstfile = os.path.join(self.dstfile,workname1)
        shutil.copyfile(self.srcfile,self.dstfile)
        


if __name__ == '__main__':
    app = datahandle()
    # app.adok()
    # app.dataview()
    # app.productionok()
    app.main()
    # app.workeffect()

           

继续阅读