天天看點

人工智能學習筆記——可視化庫matplotlib

源資料(前12)

人工智能學習筆記——可視化庫matplotlib

資料預處理(DATE格式轉換):

import pandas as pd

unrate = pd.read_csv("UNRATE.csv")

unrate["DATE"] = pd.to_datetime(unrate["DATE"])

print(unrate.head(12))

DATE  VALUE
0  1948-01-01    3.4
1  1948-02-01    3.8
2  1948-03-01    4.0
3  1948-04-01    3.9
4  1948-05-01    3.5
5  1948-06-01    3.6
6  1948-07-01    3.6
7  1948-08-01    3.9
8  1948-09-01    3.8
9  1948-10-01    3.7
10 1948-11-01    3.8
11 1948-12-01    4.0      

繪制折線圖:

import matplotlib.pyplot as plt

plt.plot()

plt.show()

人工智能學習筆記——可視化庫matplotlib

firts_twelve = unrate = unrate[0:12]

plt.plot(firts_twelve["DATE"],firts_twelve["VALUE"])

plt.show()

人工智能學習筆記——可視化庫matplotlib

x坐标标注旋轉

plt.xticks(rotation = 45)

人工智能學習筆記——可視化庫matplotlib

坐标軸與标題标注

plt.xlabel("Month") 

plt.ylabel("Unemployment Rate")

plt.title("Monthly Unemployment Trend,1948")

人工智能學習筆記——可視化庫matplotlib

子圖操作:

人工智能學習筆記——可視化庫matplotlib

fig = plt.figure()#建立繪圖區域

ax1 = fig.add_subplot(4,3,1)

ax2 = fig.add_subplot(4,3,2)

ax3 = fig.add_subplot(4,3,6)

人工智能學習筆記——可視化庫matplotlib

fig = plt.figure(figsize=(6,6))#指定畫圖區域大小

ax1 = fig.add_subplot(2,1,1)

ax2 = fig.add_subplot(2,1,2)

ax1.plot(np.arange(5),np.random.randint(1,5,5))

ax2.plot(np.arange(10),np.arange(10)*3)

plt.show()

人工智能學習筆記——可視化庫matplotlib

同一坐标系下繪制多條線:

fig = plt.figure(figsize=(6,3))

plt.plot(unrate[0:12]['MONTH'], unrate[0:12]['VALUE'], c='red')

plt.plot(unrate[12:24]['MONTH'], unrate[12:24]['VALUE'], c='blue')

plt.show()

人工智能學習筆記——可視化庫matplotlib

fig = plt.figure(figsize=(10,6))

colors = ['red', 'blue', 'green', 'orange', 'black']

for i in range(5):

    start_index = i*12

    end_index = (i+1)*12

    subset = unrate[start_index:end_index]

    plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i])

plt.show()

人工智能學習筆記——可視化庫matplotlib

曲線标簽:

fig = plt.figure(figsize=(5,3))

colors = ['red', 'blue', 'green', 'orange', 'black']

for i in range(5):

    start_index = i*12

    end_index = (i+1)*12

    subset = unrate[start_index:end_index]

    label = str(1948 + i)

    plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)

plt.legend(loc='best')#标簽定位

#print (help(plt.legend))

plt.show()

loc=  best
	upper right
	upper left
	lower left
	lower right
	right
	center left
	center right
	lower center
	upper center
	center
      

完整折線圖:

fig = plt.figure(figsize=(5,3))

colors = ['red', 'blue', 'green', 'orange', 'black']

for i in range(5):

    start_index = i*12

    end_index = (i+1)*12

    subset = unrate[start_index:end_index]

    label = str(1948 + i)

    plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)

plt.legend(loc='upper left')

plt.xlabel('Month, Integer')

plt.ylabel('Unemployment Rate, Percent')

plt.title('Monthly Unemployment Trends, 1948-1952')

plt.show()

繪制條形圖:

import pandas as pd

reviews = pd.read_csv('fandango_scores.csv')

cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']

norm_reviews = reviews[cols]

print(norm_reviews[:1])

FILM  RT_user_norm  Metacritic_user_nom  \
0  Avengers: Age of Ultron (2015)           4.3                 3.55   

   IMDB_norm  Fandango_Ratingvalue  Fandango_Stars  
0        3.9                   4.5             5.0        

import matplotlib.pyplot as plt

from numpy import arange

#The Axes.bar() method has 2 required parameters, left and height. 

#We use the left parameter to specify the x coordinates of the left sides of the bar. 

#We use the height parameter to specify the height of each bar

num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']

bar_heights = norm_reviews.ix[0, num_cols].values

print (bar_heights)

bar_positions = arange(5) + 0.75

print (bar_positions)

fig, ax = plt.subplots()

ax.bar(bar_positions, bar_heights, 0.5)

plt.show()

[4.3 3.55 3.9 4.5 5.0]
[0.75 1.75 2.75 3.75 4.75]      
人工智能學習筆記——可視化庫matplotlib

橫向條形圖:

import matplotlib.pyplot as plt

from numpy import arange

num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']

bar_widths = norm_reviews.ix[0, num_cols].values

bar_positions = arange(5) + 0.75

tick_positions = range(1,6)

fig, ax = plt.subplots()

ax.barh(bar_positions, bar_widths, 0.5)

ax.set_yticks(tick_positions)

ax.set_yticklabels(num_cols)

ax.set_ylabel('Rating Source')

ax.set_xlabel('Average Rating')

ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')

plt.show()

人工智能學習筆記——可視化庫matplotlib

散點圖:

fig, ax = plt.subplots()

ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])

ax.set_xlabel('Fandango')

ax.set_ylabel('Rotten Tomatoes')

plt.show()

人工智能學習筆記——可視化庫matplotlib

柱形圖:

import pandas as pd

import matplotlib.pyplot as plt

reviews = pd.read_csv('fandango_scores.csv')

cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']

norm_reviews = reviews[cols]

print(norm_reviews[:5])

FILM  RT_user_norm  Metacritic_user_nom  \
0  Avengers: Age of Ultron (2015)           4.3                 3.55   
1               Cinderella (2015)           4.0                 3.75   
2                  Ant-Man (2015)           4.5                 4.05   
3          Do You Believe? (2015)           4.2                 2.35   
4   Hot Tub Time Machine 2 (2015)           1.4                 1.70   

   IMDB_norm  Fandango_Ratingvalue  
0       3.90                   4.5  
1       3.55                   4.5  
2       3.90                   4.5  
3       2.70                   4.5  
4       2.55                   3.0        

fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()

fandango_distribution = fandango_distribution.sort_index()

imdb_distribution = norm_reviews['IMDB_norm'].value_counts()

imdb_distribution = imdb_distribution.sort_index()

print(fandango_distribution)

print(imdb_distribution)

2.7     2
2.8     2
2.9     5
3.0     4
3.1     3
3.2     5
3.3     4
3.4     9
3.5     9
3.6     8
3.7     9
3.8     5
3.9    12
4.0     7
4.1    16
4.2    12
4.3    11
4.4     7
4.5     9
4.6     4
4.8     3
Name: Fandango_Ratingvalue, dtype: int64
2.00     1
2.10     1
2.15     1
2.20     1
2.30     2
2.45     2
2.50     1
2.55     1
2.60     2
2.70     4
2.75     5
2.80     2
2.85     1
2.90     1
2.95     3
3.00     2
3.05     4
3.10     1
3.15     9
3.20     6
3.25     4
3.30     9
3.35     7
3.40     1
3.45     7
3.50     4
3.55     7
3.60    10
3.65     5
3.70     8
3.75     6
3.80     3
3.85     4
3.90     9
3.95     2
4.00     1
4.05     1
4.10     4
4.15     1
4.20     2
4.30     1
Name: IMDB_norm, dtype: int64      

fig, ax = plt.subplots()

#ax.hist(norm_reviews['Fandango_Ratingvalue'])#繪制柱形圖

#ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)#規定20條

ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20)#4到5範圍内20條

plt.show()

人工智能學習筆記——可視化庫matplotlib

fig = plt.figure(figsize=(5,20))

ax1 = fig.add_subplot(4,1,1)

ax2 = fig.add_subplot(4,1,2)

ax3 = fig.add_subplot(4,1,3)

ax4 = fig.add_subplot(4,1,4)

ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))

ax1.set_title('Distribution of Fandango Ratings')

ax1.set_ylim(0, 50)#y軸範圍

plt.show()

人工智能學習筆記——可視化庫matplotlib

箱型圖:

fig, ax = plt.subplots()

ax.boxplot(norm_reviews['RT_user_norm'].values)

ax.set_xticklabels(['Rotten Tomatoes'])

ax.set_ylim(0, 5)

plt.show()

人工智能學習筆記——可視化庫matplotlib

num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']

fig, ax = plt.subplots()

ax.boxplot(norm_reviews[num_cols].values)

ax.set_xticklabels(num_cols, rotation=90)

ax.set_ylim(0,5)

plt.show()

人工智能學習筆記——可視化庫matplotlib

去坐标鋸齒:

fig, ax = plt.subplots()

# Add your code here.

fig, ax = plt.subplots()

ax.tick_params(bottom="off", top="off", left="off", right="off")

plt.show()

人工智能學習筆記——可視化庫matplotlib

去邊框:

fig, ax = plt.subplots()

# Add your code here.

fig, ax = plt.subplots()

for key,spine in ax.spines.items():

    spine.set_visible(False)

plt.show()

人工智能學習筆記——可視化庫matplotlib

RGB顔色通道:

cb_dark_blue = (0/255, 107/255, 164/255)

線寬:

ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=10)

曲線标注:

ax.text(2005, 87, 'Men')

ax.text(2002, 8, 'Women')

人工智能學習筆記——可視化庫matplotlib

繼續閱讀