NumPy 特殊數組與通用函數

# 來源：NumPy Cookbook 2e ch6

建立通用函數

from __future__ import print_function 
import numpy as np

# 我們需要定義對單個元素操作的函數
def double(a):
    return 2 * a

# frompyfunc（或者 vectorize）
# 将其轉換為對數組每個元素操作的函數
ufunc = np.frompyfunc(double, 1, 1) 
print("Result", ufunc(np.arange(4)))
# Result [0 2 4 6]

勾股數

from __future__ import print_function 
import numpy as np

# 勾股數是指滿足 a ** 2 + b ** 2 == c ** 2 的三個數
# 我們使 a = m ** 2 - n ** 2，b = 2 * m * n
# c = m ** 2 + n ** 2，來尋找 a + b + c == 1000 的勾股數

# m 和 n 都取 0 ~ 32
m = np.arange(33) 
n = np.arange(33) 

# 計算 a，b 和 c
# outer 生成 a[i] op b[j] 為每個元素的矩陣
# 相當于 meshgrid 之後再逐元素操作
a = np.subtract.outer(m ** 2, n ** 2) 
b = 2 * np.multiply.outer(m, n) 
c = np.add.outer(m ** 2, n ** 2)

# 取符合我們條件的下标
# where 把布爾下标轉換為位置下标
idx =  np.where((a + b + c) == 1000) 

# 驗證并列印結果
np.testing.assert_equal(a[idx]**2 + b[idx]**2, c[idx]**2) 
print(a[idx], b[idx], c[idx]) 
# [375] [200] [425]

CharArray 字元串操作

# chararray 數組的元素隻能是字元串
# 并且擁有許多字元串專用的方法
# 雖然我們可以為字元串建立通用函數
# 但是直接使用這些方法更省事

import urllib2 
import numpy as np 
import re

# 使用 urllib2 庫下載下傳網頁
# 更推薦 requests 庫
response = urllib2.urlopen('http://python.org/') 
html = response.read() 

# 替換掉所有标簽
html = re.sub(r'<.*?>', '', html) 

# 建立僅僅包含該 HTML 的一維數組
# 并轉為 chararray
carray = np.array(html).view(np.chararray) 

# expandtabs 将 TAB 轉換為指定個數的空格
carray = carray.expandtabs(1) 
# splitlines 按換行符分割，會多一個次元
carray = carray.splitlines() 
print(carray)

建立屏蔽數組

from __future__ import print_function 
import numpy as np from scipy.misc 
import lena 
import matplotlib.pyplot as plt

# 加載 Lena 圖像
lena = lena() 

# 掩碼數組和圖像形狀一緻，元素取 0 和 1 的随機數
random_mask = np.random.randint(0, 2, size=lena.shape)

# 繪制原始圖像
plt.subplot(221) 
plt.title("Original") 
plt.imshow(lena) 
plt.axis('off')

# ma.array 建立屏蔽數組
# 如果 random_mask 中某個元素是 0
# masked_array 中就将其屏蔽
# 通路會傳回 masked
# 但是轉換回 np.array 時會恢複
masked_array = np.ma.array(lena, mask=random_mask)
print(masked_array) 

# 繪制掩碼後的圖像
plt.subplot(222) 
plt.title("Masked") 
plt.imshow(masked_array) 
plt.axis('off')

忽略負數以及極值

from __future__ import print_function 
import numpy as np 
from matplotlib.finance 
import quotes_historical_yahoo 
from datetime import date 
import matplotlib.pyplot as plt

def get_close(ticker):
    # 擷取指定股票近一年的收盤價
    today = date.today()
    start = (today.year - 1, today.month, today.day)
    quotes = quotes_historical_yahoo(ticker, start, today)
    return np.array([q[4] for q in quotes])

# 擷取 AAPL 一年的收盤價
close = get_close('AAPL')

triples = np.arange(0, len(close), 3) 
print("Triples", triples[:10], "...")
# Triples [ 0  3  6  9 12 15 18 21 24 27] ... 

# 建立等長的全 1 數組
signs = np.ones(len(close)) 
print("Signs", signs[:10], "...")
# Signs [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.] ... 

# sign 中每隔三個元素變為 -1
signs[triples] = -1 
print("Signs", signs[:10], "...")
# Signs [-1.  1.  1. -1.  1.  1. -1.  1.  1. -1.] ...

# ma.log 的作用是
# 如果元素小于等于 0，将其屏蔽
# 如果元素大于 0，取對數
ma_log = np.ma.log(close * signs) 
print("Masked logs", ma_log[:10], "...")
# Masked logs [-- 5.93655586575 5.95094223368 -- 5.97468290742 5.97510711452 -- 6.01674381162 5.97889061623 --] ...

dev = close.std() 
avg = close.mean() 
# 屏蔽 avg - dev 到 avg + dev 之外的元素
inside = np.ma.masked_outside(close, avg - dev, avg + dev) 
print("Inside", inside[:10], "...")
# Inside [-- -- -- -- -- -- 409.429675172    410.240597855 -- --] ...

# 繪制原始資料
plt.subplot(311) 
plt.title("Original") 
plt.plot(close)

# 繪制對數屏蔽後的資料
plt.subplot(312) 
plt.title("Log Masked") 
plt.plot(np.exp(ma_log))

# 繪制範圍屏蔽後的資料
plt.subplot(313) 
plt.title("Not Extreme") 
plt.plot(inside)

plt.tight_layout() 
plt.show()

記錄數組

# rec.array 是 array 的子類
# 可以通過元素的屬性來通路元素
from __future__ import print_function 
import numpy as np from matplotlib.finance 
import quotes_historical_yahoo 
from datetime import date

tickers = ['MRK', 'T', 'VZ']

def get_close(ticker):
    # 擷取指定股票近一年的收盤價
    today = date.today()
    start = (today.year - 1, today.month, today.day)
    quotes = quotes_historical_yahoo(ticker, start, today)
    return np.array([q[4] for q in quotes])

# 建立記錄數組，來統計每個股票的代碼、
# 标準分（标準差的倒數）、均值和得分
weights = np.recarray((len(tickers),), dtype=[('symbol', np.str_, 16),
    ('stdscore', float), ('mean', float), ('score', float)])

for i, ticker in enumerate(tickers):
    # 擷取收盤價、計算對數收益
    close = get_close(ticker)
    logrets = np.diff(np.log(close))
    # 儲存符号、對數收益的均值和标準分
    weights[i]['symbol'] = ticker
    weights[i]['mean'] = logrets.mean()   
    weights[i]['stdscore'] = 1/logrets.std()
    weights[i]['score'] = 0

# 每個股票的均值和标準分需要除以相應的總數
for key in ['mean', 'stdscore']:
    wsum = weights[key].sum()
    weights[key] = weights[key]/wsum

# 得分是标準分和均值的均值
weights['score'] = (weights['stdscore'] + weights['mean'])/2 weights['score'].sort()

# 列印每個股票的資訊
for record in weights:
    print("%s,mean=%.4f,stdscore=%.4f,score=%.4f" % (record['symbol'], record['mean'], record['stdscore'], record['score']))
'''
MRK,mean=0.8185,stdscore=0.2938,score=0.2177 
T,mean=0.0927,stdscore=0.3427,score=0.2262 
VZ,mean=0.0888,stdscore=0.3636,score=0.5561 
'''

NumPy 特殊數組與通用函數NumPy 特殊數組與通用函數

NumPy 特殊數組與通用函數

建立通用函數

勾股數

CharArray 字元串操作

建立屏蔽數組

忽略負數以及極值

記錄數組

繼續閱讀

YAML簡介和PyYAML安全操作YAML支援的類型YAML的優點：yaml的基本文法python操作

2021-2025年中國運動療法（KT）帶行業市場供需與戰略研究報告

Small tricks

libsvm for python 安裝

2021年危險化學品經營機關安全管理人員考試題庫及危險化學品經營機關安全管理人員考試技巧

學習軟體測試基礎測試第七天

Zeppelin 配置通路 REST APIApache Zeppelin Configuration REST API

【Torch】最簡潔logging使用指南

27. Remove Element(清單)題目代碼

無人機--飛控科普

Cloud Studio初體驗

使用 ctypes 進行 Python 和 C 的混合程式設計

【python】【資料處理】畫多元資料分布圖

【python】netconf協定對接管理裝置

「Python 網絡自動化」NETCONF —— Python 使用 NETCONF 管理配置 H3C 網絡裝置

在python中建立excel并寫入