1ä»ä¹æ¯pandasï¼
çï¼pandasæ¯ä¸ä¸ªå¼æºçpythonç±»åºï¼ç¨äºæ°æ®åææ°æ®åºåæ°æ®å¯è§å
2å¦ä½å®è£ pandas:
pip install pandas
3pandasæ°æ®è¯»åcsvæ件ï¼
import pandas as pd
fpath = r'E:/test/resultcsv.csv'
#读åcsvæ件
ratings = pd.read_csv(fpath)
#æ¥çåå è¡æ°æ®
print(ratings.head())
#æ¥çæ°æ®çå½¢ç¶ï¼è¿åè¡æ°ååæ°
print(ratings.shape)
#æ¥çååå表
print(ratings.columns)
#æ¥çç´¢å¼å
print(ratings.index)
#æ¥çæ¯åçæ°æ®ç±»å
print(ratings.dtypes)
4读åmysqlæ件:
import pandas as pd
import pymysql
conn = pymysql.connect(
host='127.0.0.1',
user='root',
password='123456',
database='wl2020v2',
charset='utf8mb4'
)
mysql_page = pd.read_sql("select * from exp_order where create_time between '2021-11-03 09:20:00' AND '2021-11-03 10:30:00'",con=conn)
print(mysql_page)
5pandas两大æ°æ®ç»æ对象dataframeåseriesçå ³ç³»ï¼
ä¸åæè ä¸è¡æ°æ®å°±æ¯ä¸ä¸ªseriesï¼ç±2个以ä¸seriesç»æçæ°æ®å°±æ¯ä¸ä¸ªdataframeï¼èseries+dataframe=pandas
ç»ä¹ ï¼
1ï¼æåå ¸è½¬æ¢ædataframe对象ï¼
import pandas as pd
dit = {
'è¿åç¼å·':['550019093732123','550019093732456','550019093732789'],
'å件人å§å':['å¼ ä¸','æå','çäº'],
'è´¦åéé¢':[6.4,6.4,8]
}
df = pd.DataFrame(dit)
2)æ¥è¯¢æ°æ®ï¼
#æ¥è¯¢åï¼
#æ¥è¯¢ä¸åæ°æ®
print(df['è¿åç¼å·'])
#æ¥è¯¢å¤åæ°æ®
print(df[['è¿åç¼å·','è´¦åéé¢']])
#æ¥è¯¢è¡
#æ¥è¯¢ç¬¬ä¸è¡æ°æ®
print(df.loc[1])
#æ¥è¯¢å¤è¡æ°æ®
print(df.loc[1:3])
3ï¼å¯éè¿typeæ¥çæ°æ®ç±»åï¼
print(type(df.loc[1:3]))
<class 'pandas.core.frame.DataFrame'>
print(type(df.loc[1]))
<class 'pandas.core.series.Series'>
6pandas使ç¨df.locæ¥è¯¢æ°æ®çäºç§æ¹æ³ï¼
1使ç¨å个labelå¼æ¥è¯¢æ°æ®
#æ¥è¯¢è¿åç¼å·550019027113305çè´¦åéé¢ï¼
import pandas as pd
df = pd.read_csv(r'E:/test/resultcsv.csv')
#æè¿åå·è®¾ä¸ºç´¢å¼æ¹ä¾¿æå¿«éåå·æ¥è¯¢
df.set_index('è¿åç¼å·',inplace=True)
#æ¥è¯¢è¿åç¼å·550019027113305çè´¦åéé¢ï¼
print(df.loc['550019087312343','è´¦åéé¢'])
#æ¥è¯¢è¿åç¼å·550019027113305çå件人å§ååå件人çµè¯
print(df.loc['550019087312343',['å件人å§å','å件人çµè¯']])
2使ç¨å¼å表æ¹éæ¥è¯¢
#æ¥è¯¢ä¸ä¸ªå¿«éåå·çå件人çµè¯
import pandas as pd
df = pd.read_csv(r'E:/test/resultcsv.csv')
#æè¿åå·è®¾ä¸ºç´¢å¼æ¹ä¾¿æå¿«éåå·æ¥è¯¢
df.set_index('è¿åç¼å·',inplace=True)
##æ¥è¯¢ä¸ä¸ªå¿«éåå·çå件人çµè¯
print(df.loc[['550019021234167','550011234113305','550123421554844'],'å件人çµè¯'])
##æ¥è¯¢ä¸ä¸ªå¿«éåå·çå件人å§ååå件人çµè¯
print(df.loc[['550123426415167','550123427113305','550019012344844'],['å件人å§å','å件人çµè¯']])
3使ç¨æ°å¼åºé´è¿è¡èå´æ¥è¯¢
import pandas as pd
df = pd.read_csv(r'E:/test/resultcsv.csv')
#è¡ååé½æåºé´æ¥è¯¢
print(df.loc['1':'15','å件人å§å':'å件人çµè¯'])
#æè¡åºé´æ¥è¯¢
#æè¦æ¥è¯¢ç´¢å¼0-2000è¡å
çææè¿åç¼å·
print(df.loc['0':'2000','è¿åç¼å·'])
#æååºé´æ¥è¯¢
#æ¥è¯¢è¿åç¼å·550019026415167çå件人å§ååå件人çµè¯
df.set_index('è¿åç¼å·',inplace=True)
print(df.loc['550019012345167','å件人å§å':'å件人çµè¯'])
4使ç¨æ¡ä»¶è¡¨è¾¾å¼æ¥è¯¢
import pandas as pd
df = pd.read_csv(r'E:/test/resultcsv.csv')
##æ¥è¯¢è´¦åéé¢å¤§äº10çæ°æ®
print(df.loc[df['è´¦åéé¢'] > 10, :])
#æ¥è¯¢è´¦åéé¢æé«å°äºçäº45ï¼å¹¶ä¸æä½å¤§äº17ï¼å¹¶ä¸éé大äº1ï¼å¹¶ä¸ä¿ä»·ä¸º0çæ°æ®
#åé¢çåå·è¡¨ç¤ºæ¥è¯¢ææçå
a = df.loc[(df['è´¦åéé¢']<=45) & (df['è´¦åéé¢']>=17) & (df['éé']>1) & (df['ä¿ä»·']==0), :]
print(a)
5è°ç¨å½æ°æ¥è¯¢
import pandas as pd
df = pd.read_csv(r'E:/test/resultcsv.csv')
#ç´æ¥ålambda表达å¼
b = df.loc[lambda df : (df['è´¦åéé¢']<=45) & (df['è´¦åéé¢']>=17) & (df['éé']>1) & (df['ä¿ä»·']==0), :]
print(b)
#èªå·±å®ä¹å½æ°
import pandas as pd
df = pd.read_csv(r'E:/test/resultcsv.csv')
def query_my_data(df):
return (df['è´¦åéé¢']<=45) & (df['è´¦åéé¢']>=17) & (df['éé']>1) & (df['ä¿ä»·']==0)
a = df.loc[query_my_data, :]
print(a)