天天看點

python制作熱力圖_Python 繪制指數相關性熱力圖

資料擷取

全部指數資料從中證指數公司擷取

加載資料

使用pandas 解析所有excel, 儲存每個指數對應的成分股的權重,資料中的一些指數隻包含前十大權重股的權重,對這些指數中其他成分股的權重按照平均配置設定的方式設定。

load_xls.py

#-*- coding: utf-8 -*-

"""

File Name: load_xls.py

Author: ce39906

mail: [email protected]

Created Time: 2019-03-01 11:35:07

"""

import pandas as pd

import numpy as np

def load_close_weight_xls(filename):

print("loading ", filename)

df = pd.read_excel(filename,

converters={'指數代碼Index Code' : lambda x: str(x)})

index_name = df['指數名稱Index Name'].values[0]

index_code = df['指數代碼Index Code'].values[0]

constituent_code = df['成分券代碼Constituent Code'].values

weight = df['權重(%)Weight(%)'].values

n = weight.shape[0]

constituent_code_2_weight = {}

for i in range(n):

constituent_code_2_weight[constituent_code[i]] = weight[i]

return index_name, index_code, constituent_code_2_weight

def load_cons_xls(filename):

print("loading ", filename)

sheet1_df = pd.read_excel(filename,

converters={'指數代碼Index Code' : lambda x: str(x)})

index_name = sheet1_df['指數名稱Index Name'].values[0]

index_code = sheet1_df['指數代碼Index Code'].values[0]

constituent_code = sheet1_df['成分券代碼Constituent Code'].values

sheet2_df = pd.read_excel(filename, sheet_name='weight')

top10_code_2_weight = {}

for index, row in sheet2_df.iterrows():

code = row['代碼']

weight = row['權重']

top10_code_2_weight[code] = weight

total_count = constituent_code.shape[0]

top10_weight_count = len(top10_code_2_weight.keys())

top10_weight_sum = 0.0

for top10_weight in top10_code_2_weight.values():

top10_weight_sum += top10_weight

default_weight = (100.0 - top10_weight_sum) / (total_count - top10_weight_count)

constituent_code_2_weight = {}

for i in range(total_count):

code = constituent_code[i]

if code in top10_code_2_weight:

constituent_code_2_weight[code] = top10_code_2_weight[code]

else:

constituent_code_2_weight[code] = default_weight

return index_name, index_code, constituent_code_2_weigh

向量化以及繪制熱力圖

向量化的方式是統計出所有指數全部成分股作為向量的全部次元。

針對每個指數對應的向量,首先将向量值全部設定為0,然後在向量上為成分股對應的次元指派。

将每個指數都向量化後,合并到一個pandas dataframe中,dataframe中的每一列代表一個指數對應的向量。擷取dataframe後使用corr()方法獲得協方差矩陣,然後使用seaborn 繪制。

cal_stock_indices_correlation.py

#-*- coding: utf-8 -*-

"""

File Name: cal_stock_indices_correlation.py

Author: ce39906

mail: [email protected]

Created Time: 2019-03-01 14:31:30

"""

import load_xls

import os

import pandas as pd

import numpy as np

import seaborn as sns

import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']

plt.rcParams['axes.unicode_minus'] = False

def load_data():

stock_indices_data = {}

constituent_codes = set()

for filename in os.listdir('data'):

if not filename.endswith('.xls'):

continue

if len(filename) == 21:

index_name, index_code, constituent_code_2_weight = load_xls.load_close_weight_xls('data/' + filename)

else:

index_name, index_code, constituent_code_2_weight = load_xls.load_cons_xls('data/' + filename)

for constituent_code in constituent_code_2_weight.keys():

constituent_codes.add(constituent_code)

stock_indices_data[index_name + '_' + str(index_code)] = constituent_code_2_weight

return list(constituent_codes), stock_indices_data

def vectorization(constituent_codes, stock_indices_data):

n = len(constituent_codes)

df = pd.DataFrame()

for stock_index_name, weights in stock_indices_data.items():

vector = [0.0] * n

for i in range(n):

code = constituent_codes[i]

if code in weights:

vector[i] = weights[code] / 100.0

df[stock_index_name] = vector

return df

def plot_corr_heatmap(df):

corr = df.corr()

f, ax = plt.subplots(figsize=(15, 15))

sns.heatmap(corr,

cmap='rainbow',

linewidths=0.05,

ax=ax,

square=True,

annot=True)

f.savefig('stock_indices_corr_heatmap.png')

def main():

constituent_codes, stock_indices_data = load_data()

df = vectorization(constituent_codes, stock_indices_data)

plot_corr_heatmap(df)

if __name__ == '__main__':

main()

熱力圖

python制作熱力圖_Python 繪制指數相關性熱力圖