天天看點

Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并

Pandas基礎

  • Series
  • DataFrame
  • 篩選資料
  • 指派及操作
  • 處理空值
  • concat資料合并
    • 外接 内接
  • merge合并
    • 外連接配接 内連接配接 左連接配接 右連接配接

Series

import pandas as pd

s1 = pd.Series([4,-7,-5,3]) #建立一個series,索引為預設值
print(s1)
print(s1.values) #series value
print(s1.index) #series index
print("********")
s2 = pd.Series([4.0,6.5,-0.5,4.2],index=['d','b','a','c'])
print(s2)
print(s2['a']) #根據索引取值
print(s2[['a','b','c','d']])
print("********")
#Series可以看作是一個定長的有序字典
dic1 = {'apple':5, 'pen':3, 'applepen':10}
s3 = pd.Series(dic1)
print(s3)
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并

DataFrame

data = {'year':[2014,2015,2016,2017],
        'income':[10000,30000,50000,80000],
        'pay':[5000,20000,30000,3000]}
df1 = pd.DataFrame(data)
# 一些屬性
print(df1.columns) #列
print("********")
print(df1.index) #行
print("********")
print(df1.values)
print("********")
print(df1.describe())
print("********")
print(df1.T)
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并
import pandas as pd
import numpy as np

df2 = pd.DataFrame(np.arange(12).reshape((3,4)))
df3 = pd.DataFrame(np.arange(12).reshape((3,4)),index=['a','c','b'],columns=[2,33,44,5])
df4 = df3.sort_index(axis=1) #列排序
df5 = df3.sort_index(axis=0) #行排序
df6 = df3.sort_values(by=44) #對單獨某一個列排序
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并

篩選資料

import pandas as pd
import numpy as np

dates = pd.date_range('20170101',periods=5)
df1 = pd.DataFrame(np.arange(15).reshape((5,3)),index=dates,columns=['A','B','C'])

# 将DataFrame的列擷取為一個Series
print(df1['A'])  #或者寫為: df1.A
print("********")
print(df1[0:2])  #取0-1行
print("********")
print(df1['20170102':'20170104'])
print("********")
#通過标簽選擇資料
print(df1.loc['20170102'])
print("********")
print(df1.loc['20170101',['A','C']])
print("********")
print(df1.loc[:,['A','B']])
print("********")
#通過位置選擇資料
print(df1.iloc[2]) #第二行
print("********")
print(df1.iloc[1:3,2:3])
print("********")
print(df1.iloc[[1,2,4],[1,2]])
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并

指派及操作

import pandas as pd
import numpy as np

dates = np.arange(20170101,20170107)
df1 = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])

df1.iloc[2,2] = 100
df1.loc[20170102,'B'] = 200
df1[df1.A>10] = 0 #快速找到A這一列大于10的行指派為0
df1.A[df1.A==0] = 1
df1['E'] = 10 #添加一列
df1['F'] = pd.Series([1,2,3,4,5,6],index=dates)#添加一列
df1.loc[20170107,['A','B','C']] = [1,2,3] #添加一行
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并
s1 = pd.Series([1,2,3,4,5,6],index=['A','B','C','D','E','F'])
s1.name = 'S1'
df2 = df1.append(s1)
df1.insert(1,'G',df2['E'])#在第1列插入索引為G的df2中的E列

g = df1.pop('G')#彈出G列
df1.insert(6,'G',g)#在最後插入

del df1['G']#删除G列
df2 = df1.drop(['A','B'],axis=1)#删除AB列 1代表列
df2 = df1.drop([20170101,20170102],axis=0)#删除20170101,20170102行 0代表行
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并

處理空值

# axis=[0,1] 0代表行,1代表列。
# how=['any','all'] any任意一個或多個 all全部為空值
df2.dropna(axis=0,how='any') #判斷哪一行有空值,删除
df2.dropna(axis=1,how='any') #判斷哪一列有空值,删除
df2.fillna(value=0) #把空值指派為0
df2.isnull() #檢視空值
np.any(df2.isnull()) #隻要有一個或多個空值就會傳回true
np.all(df2.isnull()) #所有為空值才傳回true
           

concat資料合并

df1 = pd.DataFrame(np.arange(12).reshape((3,4)),columns=['a','b','c','d'])
df2 = pd.DataFrame(np.arange(12,24).reshape((3,4)),columns=['a','b','c','d'])
df3 = pd.DataFrame(np.arange(24,36).reshape((3,4)),columns=['a','b','c','d'])
df4 = pd.concat([df1,df2,df3],axis=0) #縱向合并
df5 = pd.concat([df1,df2,df3],axis=0,ignore_index=True) #縱向合并,并且不考慮原來的index
df6 = pd.concat([df1,df2,df3],axis=1) #橫向合并
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并

外接 内接

df1 = pd.DataFrame(np.arange(12).reshape((3,4)),columns=['a','b','c','f'])
df2 = pd.DataFrame(np.arange(12,24).reshape((3,4)),columns=['a','c','d','e'])
#合并兩個表,缺少的部分填充NaN
df3 = pd.concat([df1,df2],join='outer',ignore_index=True) 
#合并兩個表,缺少的部分去掉,即保留共有的部分
df4 = pd.concat([df1,df2],join='inner',ignore_index=True) 
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并
df1 = pd.DataFrame(np.arange(12).reshape((3,4)),columns=['a','b','c','f'])
df2 = pd.DataFrame(np.arange(12,24).reshape((4,3)),columns=['a','c','d'])
df3 = pd.concat([df1,df2],axis=1,join_axes=[df1.index]) #橫向合并,index使用df1的index
df4 = pd.concat([df1,df2],axis=1) #橫向合并
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并

merge合并

df_left = pd.DataFrame({'key':['K0','K1','K2','K3'],
                        'A':['A0','A1','A2','A3'],
                        'B':['B0','B1','B2','B3']})
df_right = pd.DataFrame({'key':['K0','K1','K2','K3'],
                         'C':['C0','C1','C2','C3'],
                         'D':['D0','D1','D2','D3']})
df = pd.merge(df_left,df_right,on='key')
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并

外連接配接 内連接配接 左連接配接 右連接配接

df1 = pd.DataFrame({'key1':['K0','K0','K1','K2'],'key2':['K0','K1','K0','K1'],
                    'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
df2 = pd.DataFrame({'key1':['K0','K1','K1','K3'],'key2':['K0','K0','K0','K0'],
                    'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})
#how預設inner how = ['left','right','inner','outer']
df_outer = pd.merge(df1,df2,on=['key1','key2'],how='outer')
df_inner = pd.merge(df1,df2,on=['key1','key2'],how='inner')
df_left = pd.merge(df1,df2,on=['key1','key2'],how='left')
df_right = pd.merge(df1,df2,on=['key1','key2'],how='right')
#顯示merge資訊
df_outer1 = pd.merge(df1,df2,on=['key1','key2'],how='outer',indicator=True)
#顯示merge資訊,表頭中名字為indicator_column
df_outer2 = pd.merge(df1,df2,on=['key1','key2'],how='outer',indicator='indicator_column')
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并
df1 = pd.DataFrame({'A':['A0','A1','A2'],'B':['B0','B1','B2']},
                    index = ['K0','K1','K2'])
df2 = pd.DataFrame({'C':['C0','C2','C3'],'D':['D0','D2','D3']},
                    index=['K0','K2','K3'])
df = pd.merge(df1,df2,left_index=True,right_index=True,how='outer')

df_boys = pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
df_girls = pd.DataFrame({'k':['K0','K0','K3'],'age':[4,5,6]})
#差別左邊右邊的age名字的列
df_all = pd.merge(df_boys,df_girls,on='k',suffixes=['_boy','_girl'],how='outer') 
           
Pandas基礎學習SeriesDataFrame篩選資料指派及操作處理空值concat資料合并merge合并