https://blog.csdn.net/cxlhuihui/article/details/80006441 1.echart可做出更精美的圖檔,matplotlib和seaborn可以對資料進行簡單的可視化操作
2.series或dateframe轉化成array使用方法.values,在可視化過程中盡量用np數組的形式
3.%matplotlib inline 圖是嵌入在notebook裡面的不是跳出來一個框的形式
4.matplotlib.pyplot作圖的子子產品
5.x = np.arange(0., 10, 0.2),0-10之間,間隔0.2取一個數
6.numpy是具有boardcast的特性
7.plt.rcParams["figure.figsize"] = (12,8)設定圖的尺寸
8.plt.plot(x, y1, color='blue', linewidth=1.5, linestyle='-', marker='.', label=r'$y = cos{x}$')作圖的輪廓,其中label是取名作用,在這裡可以不指定,下面區分圖例使用
9.完整的繪圖
#加載包
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
# 定義資料部分
x = np.arange(0., 10, 0.2)
y1 = np.cos(x)
y2 = np.sin(x)
y3 = np.sqrt(x)
# 繪制 3 條函數曲線
plt.plot(x, y1, color='blue', linewidth=1.5, linestyle='-', marker='.', label=r'$y = cos{x}$')
plt.plot(x, y2, color='green', linewidth=1.5, linestyle='-', marker='*', label=r'$y = sin{x}$')
plt.plot(x, y3, color='m', linewidth=1.5, linestyle='-', marker='x', label=r'$y = \sqrt{x}$')
# 坐标軸上移
ax = plt.subplot(111)
ax.spines['right'].set_color('none') # 去掉右邊的邊框線
ax.spines['top'].set_color('none') # 去掉上邊的邊框線
# 移動下邊邊框線,相當于移動 X 軸
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0))
# 移動左邊邊框線,相當于移動 y 軸
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data', 0))
# 設定 x, y 軸的取值範圍
plt.xlim(x.min()*1.1, x.max()*1.1)
plt.ylim(-1.5, 4.0)
# 設定 x, y 軸的刻度值
plt.xticks([2, 4, 6, 8, 10], [r'2', r'4', r'6', r'8', r'10'])
plt.yticks([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0],
[r'-1.0', r'0.0', r'1.0', r'2.0', r'3.0', r'4.0'])
# 添加文字
plt.text(0.8, 0.8, r'$x \in [0.0, \ 10.0]$', color='k', fontsize=15)
plt.text(0.8, 0.9, r'$y \in [-1.0, \ 4.0]$', color='k', fontsize=15)
# 特殊點添加注解
plt.scatter([8,],[np.sqrt(8),], 50, color ='m') # 使用散點圖放大目前點
plt.annotate(r'$2\sqrt{2}$', xy=(8, np.sqrt(8)), xytext=(8.5, 2.2), fontsize=16, color='#090909', arrowprops=dict(arrowstyle='->', connectionstyle='arc3, rad=0.1', color='#090909'))
# 設定标題、x軸、y軸
plt.title(r'$the \ function \ figure \ of \ cos(), \ sin() \ and \ sqrt()$', fontsize=19)
plt.xlabel(r'$the \ input \ value \ of \ x$', fontsize=18, labelpad=88.8)
plt.ylabel(r'$y = f(x)$', fontsize=18, labelpad=12.5)
# 設定圖例及位置
plt.legend(loc='up right')
# plt.legend(['cos(x)', 'sin(x)', 'sqrt(x)'], loc='up right')
# 顯示網格線
plt.grid(True)
# 顯示繪圖
plt.show()
savefig('../figures/plot3d_ex.png',dpi=48) # 儲存,前提目錄存在,dpi分辨率
10.plt一般是全局的做圖
11.常用圖形:曲線圖、灰階圖、散點圖、箱式圖、餅狀圖
12.可視化詳細的網站:http://matplotlib.org/api/pyplot_api.html
更詳細的網站:http://matplotlib.org/api/index.html
13.自行車租賃的案例
##加載相應的包
import pandas as pd # 讀取資料到DataFrame
import urllib # 擷取網絡資料
import shutil # 檔案操作
import zipfile # 壓縮解壓
import os#和檔案,目錄等打交道的一個庫
## 建立臨時目錄
try:
os.system('mkdir bike_data')
except:
os.system('rm -rf bike_data; mkdir bike_data')
data_source = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip' # 網絡資料位址
zipname = 'bike_data/Bike-Sharing-Dataset.zip' # 拼接檔案和路徑
urllib.urlretrieve(data_source, zipname) # 獲得資料
zip_ref = zipfile.ZipFile(zipname, 'r') # 建立一個ZipFile對象處理壓縮檔案
#zip_ref.extractall(temp_dir) # 解壓
zip_ref.extractall('bike_data')
zip_ref.close()
daily_path = 'bike_data/day.csv'
daily_data = pd.read_csv(daily_path) # 讀取csv檔案
daily_data['dteday'] = pd.to_datetime(daily_data['dteday']) # 把字元串資料傳換成日期資料
drop_list = ['instant', 'season', 'yr', 'mnth', 'holiday', 'workingday', 'weathersit', 'atemp', 'hum'] # 不關注的列
daily_data.drop(drop_list, inplace = True, axis = 1) # inplace=true在對象上直接操作
daily_data.head() # 看一看資料~
#####配置參數
from __future__ import division, print_function # 引入3.x版本的除法和列印
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
# 在notebook中顯示繪圖結果
%matplotlib inline
# 設定一些全局的資源參數,可以進行個性化修改
import matplotlib
# 設定圖檔尺寸 14" x 7"
# rc: resource configuration
matplotlib.rc('figure', figsize = (14, 7))
# 設定字型 14
matplotlib.rc('font', size = 14)
# 不顯示頂部和右側的坐标線
matplotlib.rc('axes.spines', top = False, right = False)
# 不顯示網格
matplotlib.rc('axes', grid = False)
# 設定背景顔色是白色
matplotlib.rc('axes', facecolor = 'white')
######做散點圖,檢視關聯性
from matplotlib import font_manager
fontP = font_manager.FontProperties()
fontP.set_family('SimHei')
fontP.set_size(1000)
# 包裝一個散點圖的函數便于複用
def scatterplot(x_data, y_data, x_label, y_label, title):
# 建立一個繪圖對象
fig, ax = plt.subplots()
# 設定資料、點的大小、點的顔色和透明度
ax.scatter(x_data, y_data, s = 10, color = '#539caf', alpha = 0.75) # http://www.114la.com/other/rgb.htm
# 添加标題和坐标說明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
# 繪制散點圖
scatterplot(x_data = daily_data['temp'].values
, y_data = daily_data['cnt'].values
, x_label = 'temperature (C)'
, y_label = 'Check outs'
, title = 'Number of Check Outs vs Temperature')
####做曲線圖
# 線性回歸
import statsmodels.api as sm # 最小二乘
from statsmodels.stats.outliers_influence import summary_table # 獲得彙總資訊
x = sm.add_constant(daily_data['temp']) # 線性回歸增加常數項 y=kx+b
y = daily_data['cnt']
regr = sm.OLS(y, x) # 普通最小二乘模型,ordinary least square model
res = regr.fit()
# 從模型獲得拟合資料
st, data, ss2 = summary_table(res, alpha=0.05) # 置信水準alpha=5%,st資料彙總,data資料詳情,ss2資料列名
fitted_values = data[:,2]
print(data)
print(fitted_values)
print(ss2)
# 包裝曲線繪制函數
def lineplot(x_data, y_data, x_label, y_label, title):
# 建立繪圖對象
_, ax = plt.subplots()
# 繪制拟合曲線,lw=linewidth,alpha=transparancy
ax.plot(x_data, y_data, lw = 2, color = '#539caf', alpha = 1)
# 添加标題和坐标說明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
# 調用繪圖函數
lineplot(x_data = daily_data['temp']
, y_data = fitted_values
, x_label = 'Normalized temperature (C)'
, y_label = 'Check outs'
, title = 'Line of Best Fit for Number of Check Outs vs Temperature')
########做帶置信區間的曲線圖
# 獲得5%置信區間的上下界
predict_mean_ci_low, predict_mean_ci_upp = data[:,4:6].T
# 建立置信區間DataFrame,上下界
CI_df = pd.DataFrame(columns = ['x_data', 'low_CI', 'upper_CI'])
CI_df['x_data'] = daily_data['temp']
CI_df['low_CI'] = predict_mean_ci_low
CI_df['upper_CI'] = predict_mean_ci_upp
CI_df.sort_values('x_data', inplace = True) # 根據x_data進行排序
# 繪制置信區間
def lineplotCI(x_data, y_data, sorted_x, low_CI, upper_CI, x_label, y_label, title):
# 建立繪圖對象
_, ax = plt.subplots()
# 繪制預測曲線
ax.plot(x_data, y_data, lw = 1, color = '#539caf', alpha = 1, label = 'Fit')
# 繪制置信區間,順序填充
ax.fill_between(sorted_x, low_CI, upper_CI, color = '#539caf', alpha = 0.4, label = '95% CI')
# 添加标題和坐标說明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
# 顯示圖例,配合label參數,loc=“best”自适應方式
ax.legend(loc = 'best')
# Call the function to create plot
lineplotCI(x_data = daily_data['temp']
, y_data = fitted_values
, sorted_x = CI_df['x_data']
, low_CI = CI_df['low_CI']
, upper_CI = CI_df['upper_CI']
, x_label = 'Normalized temperature (C)'
, y_label = 'Check outs'
, title = 'Line of Best Fit for Number of Check Outs vs Temperature')
###### 雙坐标軸的曲線圖
# 雙縱坐标繪圖函數
def lineplot2y(x_data, x_label, y1_data, y1_color, y1_label, y2_data, y2_color, y2_label, title):
_, ax1 = plt.subplots()
ax1.plot(x_data, y1_data, color = y1_color)
# 添加标題和坐标說明
ax1.set_ylabel(y1_label, color = y1_color)
ax1.set_xlabel(x_label)
ax1.set_title(title)
ax2 = ax1.twinx() # 兩個繪圖對象共享橫坐标軸
ax2.plot(x_data, y2_data, color = y2_color)
ax2.set_ylabel(y2_label, color = y2_color)
# 右側坐标軸可見
ax2.spines['right'].set_visible(True)
# 調用繪圖函數
lineplot2y(x_data = daily_data['dteday']
, x_label = 'Day'
, y1_data = daily_data['cnt']
, y1_color = '#539caf'
, y1_label = 'Check outs'
, y2_data = daily_data['windspeed']
, y2_color = '#7663b0'
, y2_label = 'Normalized windspeed'
, title = 'Check Outs and Windspeed Over Time') https://blog.csdn.net/cxlhuihui/article/details/80006441 1.echart可做出更精美的圖檔,matplotlib和seaborn可以對資料進行簡單的可視化操作
2.series或dateframe轉化成array使用方法.values,在可視化過程中盡量用np數組的形式
3.%matplotlib inline 圖是嵌入在notebook裡面的不是跳出來一個框的形式
4.matplotlib.pyplot作圖的子子產品
5.x = np.arange(0., 10, 0.2),0-10之間,間隔0.2取一個數
6.numpy是具有boardcast的特性
7.plt.rcParams["figure.figsize"] = (12,8)設定圖的尺寸
8.plt.plot(x, y1, color='blue', linewidth=1.5, linestyle='-', marker='.', label=r'$y = cos{x}$')作圖的輪廓,其中label是取名作用,在這裡可以不指定,下面區分圖例使用
9.完整的繪圖
#加載包
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
# 定義資料部分
x = np.arange(0., 10, 0.2)
y1 = np.cos(x)
y2 = np.sin(x)
y3 = np.sqrt(x)
# 繪制 3 條函數曲線
plt.plot(x, y1, color='blue', linewidth=1.5, linestyle='-', marker='.', label=r'$y = cos{x}$')
plt.plot(x, y2, color='green', linewidth=1.5, linestyle='-', marker='*', label=r'$y = sin{x}$')
plt.plot(x, y3, color='m', linewidth=1.5, linestyle='-', marker='x', label=r'$y = \sqrt{x}$')
# 坐标軸上移
ax = plt.subplot(111)
ax.spines['right'].set_color('none') # 去掉右邊的邊框線
ax.spines['top'].set_color('none') # 去掉上邊的邊框線
# 移動下邊邊框線,相當于移動 X 軸
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0))
# 移動左邊邊框線,相當于移動 y 軸
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data', 0))
# 設定 x, y 軸的取值範圍
plt.xlim(x.min()*1.1, x.max()*1.1)
plt.ylim(-1.5, 4.0)
# 設定 x, y 軸的刻度值
plt.xticks([2, 4, 6, 8, 10], [r'2', r'4', r'6', r'8', r'10'])
plt.yticks([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0],
[r'-1.0', r'0.0', r'1.0', r'2.0', r'3.0', r'4.0'])
# 添加文字
plt.text(0.8, 0.8, r'$x \in [0.0, \ 10.0]$', color='k', fontsize=15)
plt.text(0.8, 0.9, r'$y \in [-1.0, \ 4.0]$', color='k', fontsize=15)
# 特殊點添加注解
plt.scatter([8,],[np.sqrt(8),], 50, color ='m') # 使用散點圖放大目前點
plt.annotate(r'$2\sqrt{2}$', xy=(8, np.sqrt(8)), xytext=(8.5, 2.2), fontsize=16, color='#090909', arrowprops=dict(arrowstyle='->', connectionstyle='arc3, rad=0.1', color='#090909'))
# 設定标題、x軸、y軸
plt.title(r'$the \ function \ figure \ of \ cos(), \ sin() \ and \ sqrt()$', fontsize=19)
plt.xlabel(r'$the \ input \ value \ of \ x$', fontsize=18, labelpad=88.8)
plt.ylabel(r'$y = f(x)$', fontsize=18, labelpad=12.5)
# 設定圖例及位置
plt.legend(loc='up right')
# plt.legend(['cos(x)', 'sin(x)', 'sqrt(x)'], loc='up right')
# 顯示網格線
plt.grid(True)
# 顯示繪圖
plt.show()
savefig('../figures/plot3d_ex.png',dpi=48) # 儲存,前提目錄存在,dpi分辨率
10.plt一般是全局的做圖
11.常用圖形:曲線圖、灰階圖、散點圖、箱式圖、餅狀圖
12.可視化詳細的網站:http://matplotlib.org/api/pyplot_api.html
更詳細的網站:http://matplotlib.org/api/index.html
13.自行車租賃的案例
##加載相應的包
import pandas as pd # 讀取資料到DataFrame
import urllib # 擷取網絡資料
import shutil # 檔案操作
import zipfile # 壓縮解壓
import os#和檔案,目錄等打交道的一個庫
## 建立臨時目錄
try:
os.system('mkdir bike_data')
except:
os.system('rm -rf bike_data; mkdir bike_data')
data_source = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip' # 網絡資料位址
zipname = 'bike_data/Bike-Sharing-Dataset.zip' # 拼接檔案和路徑
urllib.urlretrieve(data_source, zipname) # 獲得資料
zip_ref = zipfile.ZipFile(zipname, 'r') # 建立一個ZipFile對象處理壓縮檔案
#zip_ref.extractall(temp_dir) # 解壓
zip_ref.extractall('bike_data')
zip_ref.close()
daily_path = 'bike_data/day.csv'
daily_data = pd.read_csv(daily_path) # 讀取csv檔案
daily_data['dteday'] = pd.to_datetime(daily_data['dteday']) # 把字元串資料傳換成日期資料
drop_list = ['instant', 'season', 'yr', 'mnth', 'holiday', 'workingday', 'weathersit', 'atemp', 'hum'] # 不關注的列
daily_data.drop(drop_list, inplace = True, axis = 1) # inplace=true在對象上直接操作
daily_data.head() # 看一看資料~
#####配置參數
from __future__ import division, print_function # 引入3.x版本的除法和列印
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
# 在notebook中顯示繪圖結果
%matplotlib inline
# 設定一些全局的資源參數,可以進行個性化修改
import matplotlib
# 設定圖檔尺寸 14" x 7"
# rc: resource configuration
matplotlib.rc('figure', figsize = (14, 7))
# 設定字型 14
matplotlib.rc('font', size = 14)
# 不顯示頂部和右側的坐标線
matplotlib.rc('axes.spines', top = False, right = False)
# 不顯示網格
matplotlib.rc('axes', grid = False)
# 設定背景顔色是白色
matplotlib.rc('axes', facecolor = 'white')
######做散點圖,檢視關聯性
from matplotlib import font_manager
fontP = font_manager.FontProperties()
fontP.set_family('SimHei')
fontP.set_size(1000)
# 包裝一個散點圖的函數便于複用
def scatterplot(x_data, y_data, x_label, y_label, title):
# 建立一個繪圖對象
fig, ax = plt.subplots()
# 設定資料、點的大小、點的顔色和透明度
ax.scatter(x_data, y_data, s = 10, color = '#539caf', alpha = 0.75) # http://www.114la.com/other/rgb.htm
# 添加标題和坐标說明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
# 繪制散點圖
scatterplot(x_data = daily_data['temp'].values
, y_data = daily_data['cnt'].values
, x_label = 'temperature (C)'
, y_label = 'Check outs'
, title = 'Number of Check Outs vs Temperature')
####做曲線圖
# 線性回歸
import statsmodels.api as sm # 最小二乘
from statsmodels.stats.outliers_influence import summary_table # 獲得彙總資訊
x = sm.add_constant(daily_data['temp']) # 線性回歸增加常數項 y=kx+b
y = daily_data['cnt']
regr = sm.OLS(y, x) # 普通最小二乘模型,ordinary least square model
res = regr.fit()
# 從模型獲得拟合資料
st, data, ss2 = summary_table(res, alpha=0.05) # 置信水準alpha=5%,st資料彙總,data資料詳情,ss2資料列名
fitted_values = data[:,2]
print(data)
print(fitted_values)
print(ss2)
# 包裝曲線繪制函數
def lineplot(x_data, y_data, x_label, y_label, title):
# 建立繪圖對象
_, ax = plt.subplots()
# 繪制拟合曲線,lw=linewidth,alpha=transparancy
ax.plot(x_data, y_data, lw = 2, color = '#539caf', alpha = 1)
# 添加标題和坐标說明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
# 調用繪圖函數
lineplot(x_data = daily_data['temp']
, y_data = fitted_values
, x_label = 'Normalized temperature (C)'
, y_label = 'Check outs'
, title = 'Line of Best Fit for Number of Check Outs vs Temperature')
########做帶置信區間的曲線圖
# 獲得5%置信區間的上下界
predict_mean_ci_low, predict_mean_ci_upp = data[:,4:6].T
# 建立置信區間DataFrame,上下界
CI_df = pd.DataFrame(columns = ['x_data', 'low_CI', 'upper_CI'])
CI_df['x_data'] = daily_data['temp']
CI_df['low_CI'] = predict_mean_ci_low
CI_df['upper_CI'] = predict_mean_ci_upp
CI_df.sort_values('x_data', inplace = True) # 根據x_data進行排序
# 繪制置信區間
def lineplotCI(x_data, y_data, sorted_x, low_CI, upper_CI, x_label, y_label, title):
# 建立繪圖對象
_, ax = plt.subplots()
# 繪制預測曲線
ax.plot(x_data, y_data, lw = 1, color = '#539caf', alpha = 1, label = 'Fit')
# 繪制置信區間,順序填充
ax.fill_between(sorted_x, low_CI, upper_CI, color = '#539caf', alpha = 0.4, label = '95% CI')
# 添加标題和坐标說明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
# 顯示圖例,配合label參數,loc=“best”自适應方式
ax.legend(loc = 'best')
# Call the function to create plot
lineplotCI(x_data = daily_data['temp']
, y_data = fitted_values
, sorted_x = CI_df['x_data']
, low_CI = CI_df['low_CI']
, upper_CI = CI_df['upper_CI']
, x_label = 'Normalized temperature (C)'
, y_label = 'Check outs'
, title = 'Line of Best Fit for Number of Check Outs vs Temperature')
###### 雙坐标軸的曲線圖
# 雙縱坐标繪圖函數
def lineplot2y(x_data, x_label, y1_data, y1_color, y1_label, y2_data, y2_color, y2_label, title):
_, ax1 = plt.subplots()
ax1.plot(x_data, y1_data, color = y1_color)
# 添加标題和坐标說明
ax1.set_ylabel(y1_label, color = y1_color)
ax1.set_xlabel(x_label)
ax1.set_title(title)
ax2 = ax1.twinx() # 兩個繪圖對象共享橫坐标軸
ax2.plot(x_data, y2_data, color = y2_color)
ax2.set_ylabel(y2_label, color = y2_color)
# 右側坐标軸可見
ax2.spines['right'].set_visible(True)
# 調用繪圖函數
lineplot2y(x_data = daily_data['dteday']
, x_label = 'Day'
, y1_data = daily_data['cnt']
, y1_color = '#539caf'
, y1_label = 'Check outs'
, y2_data = daily_data['windspeed']
, y2_color = '#7663b0'
, y2_label = 'Normalized windspeed'
, title = 'Check Outs and Windspeed Over Time')