1.dataFrame中groupby後變成MultiIndex型的series
group=df_tmp.groupby(['SK_ID_CURR','CREDIT_ACTIVE']).size()
group
SK_ID_CURR CREDIT_ACTIVE
162297 Active 1
Closed 2
215354 Active 6
Closed 1
dtype: int64
group.index
MultiIndex(levels=[[162297, 215354], ['Active', 'Closed']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=['SK_ID_CURR', 'CREDIT_ACTIVE'])
for id in group.index.levels[0]:
print (id)
162297
215354
#group.index.get_loc((215354,"Active"))擷取索引為(215354,"Active")其對應的values中的下标
group.values[group.index.get_loc((215354,"Active"))]
6
2.衍生出各個id,Active的數量,代碼如下:
df_new=pd.DataFrame({'SK_ID_CURR':list(set(df_bureau.SK_ID_CURR))}) #衍生特征首先建構一個新的dataframe
group=df_bureau.groupby(['SK_ID_CURR','CREDIT_ACTIVE']).size()
df_new['CREDIT_ACTIVE_COUNT']=0
for id in group.index.levels[0]:
df_new['CREDIT_ACTIVE_COUNT'].loc[df_new.SK_ID_CURR==id]=group.values[group.index.get_loc((id,"Active"))]