之前展示過隻使用python、numpy建構簡單的神經網絡:前向傳播、反向傳播、鍊式法則求導等,有助于了解相關知識,但工程中是不可能這樣做的。
一些基本的概念
1、常量 tf.constant
```python
import tensorflow as tf
a1 = tf.constant([1,2,3],tf.int32,name="a1")
print(a1) # 列印:Tensor("a1:0", shape=(3,), dtype=int32)
print(type(a1)) # 列印 <class 'tensorflow.`python`.framework.ops.Tensor'>
# 常量
tf.zeros([3,2],tf.float32) # 次元3 X 2
常量定以後值不可變
2、變量 tf.Variable 和 tf.Session()
```python
v1 = tf.Variable(a1**2,tf.float32,name=v1")
init = tf.global_variables_initializer()
with tf.Session() as session:
session.run(init)
print(session.run(loss))
3、占位符 tf.placeholder()
X = tf.placeholder(dtype=tf.float32, shape=[144, 10], name=‘X’)
```python
sess = tf.Session()
x = tf.placeholder(tf.int64,name='x') # 定義一個占位符
# 使用字典 feed_dict= 命名變量進行傳輸資料進行計算
print(sess.run(2*x, feed_dict={s:3}))
sess.close()
4、變量和占位符的差別
1、tf.variable 在聲明時需要指定初始化值
tf.placeholder, 不必指定初始值,而在Session.run 中使用字典傳遞,可以了解占位符就是一個通道或形參,用來傳遞資料。
2、 使用中,占位符通常用來傳遞訓練樣本資料,變量通常用來保持中間變量等。
其他常用簡單函數
tensorflow 功能豐富,具體的需要參考其api。 這裡簡單整理一些常用的函數。
1、矩陣乘法
tf.matmul(W,X)
2、加法
tf.add()
3、激活函數
- tf.nn.relu(features, name=None)
- tf.nn.relu6(features, name=None)
- tf.nn.softplus(features, name=None)
- tf.nn.dropout(x, keep_prob, noise_shape=None, seed=None, name=None)
- tf.nn.bias_add(value, bias, name=None)
- tf.sigmoid(x, name=None)
- tf.tanh(x, name=None)
4、交叉熵損失函數
tf.nn.sigmoid_cross_entropy_with_logits( _sentinel=None, labels=None, logits=None,name=None)
(2) − ( y ( i ) log σ ( z ( i ) ) + ( 1 − y ( i ) ) log ( 1 − σ ( z ( i ) ) ) - \large ( \small y^{(i)} \log \sigma(z^{(i)}) +(1-y^{(i)})\log (1-\sigma(z^{(i)})\large )\small\tag{2} −(y(i)logσ(z(i))+(1−y(i))log(1−σ(z(i)))(2)
其他損失函數: 可以參考api
5、獨熱編碼
tf.one_hot(labels, depth, axis)
代碼示例
用tensorflow 定義一個簡單的神經網絡
```python
import math
import numpy as np
import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops
# 代價函數
def compute_cost(Z3, Y):
"""
Computes the cost
Z3 -- 輸出層線性單元的輸出
Y -- 标簽
Returns:
cost - Tensor of the cost function
"""
logits = tf.transpose(Z3)
labels = tf.transpose(Y)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits =logits , labels = labels))
return cost
# 對樣本标簽資料進行獨熱編碼
def one_hot_matrix(labels , num_classes):
"""
labels: 1Xm 如:[1,3,0,2,1] num_classes = 4
編碼後: 0 0 1 0 0
1 0 0 0 1
0 0 0 1 0
0 1 0 0 0
編碼後的形式: 由tf.one_hot(labels, depth, axis)中的 axis 決定
"""
num_classes = tf.constant(num_classes,name="num_classes")
one_hot_matrix = tf.one_hot(labels,C,axis=0)
sess = tf.Session()
# 擷取獨熱編碼
one_hot = sess.run(one_hot_matrix)
sess.close()
return one_hot
def create_placeholders(n_x, n_y):
"""
建立神經網絡中的 輸入 和 輸出占位符
"""
X = tf.placeholder(tf.float32,shape=(n_x,None),name="X")
Y = tf.placeholder(tf.float32,shape=(n_y,None),name="Y")
return X, Y
# 前向傳播
def forward_propagation(X, parameters):
"""
2層神經網絡, 中間層激活函數 使用, relu : tf.nn.relu()
輸出層使用SOFTMAX 分類器
"""
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
Z1 = tf.add(tf.matmul(W1,X),b1)
A1 = tf.nn.relu(Z1)
Z2 = tf.add(tf.matmul(W2,A1),b2)
A2 = tf.nn.relu(Z2)
Z3 = tf.add(tf.matmul(W3,A2),b3)
return Z3
# 初始化參數
def initialize_parameters():
"""
初始化參數,參數的次元 和 神經網絡的結構和 資料的次元相關,這個裡 隻是給個例子
"""
tf.set_random_seed(1) #種子
W1 = tf.get_variable("W1",[25,12288],initializer = tf.contrib.layers.xavier_initializer(seed = 1))
b1 = tf.get_variable("b1",[25,1],initializer = tf.contrib.layers.xavier_initializer(seed = 1))
W2 = tf.get_variable("W2",[12,25],initializer = tf.contrib.layers.xavier_initializer(seed = 1))
b2 = tf.get_variable("b2",[12,1],initializer = tf.contrib.layers.xavier_initializer(seed = 1))
W3 = tf.get_variable("W3",[6,12],initializer = tf.contrib.layers.xavier_initializer(seed = 1))
b3 = tf.get_variable("b3",[6,1],initializer = tf.contrib.layers.xavier_initializer(seed = 1))
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2,
"W3": W3,
"b3": b3}
return parameters
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001,
num_epochs = 3500, minibatch_size = 32, print_cost = True):
"""
構模組化型
"""
ops.reset_default_graph()
tf.set_random_seed(1)
seed = 2
(n_x, m) = X_train.shape # 訓練資料 n_x 特征次元, m樣本個數
n_y = Y_train.shape[0]
cost =[]
# 定義占位符
X, Y = create_placeholders(n_x, n_y)
parameters = initialize_parameters() # 初始化參數
Z3 = forward_propagation(X, parameters)
# 代價函數 計算圖
cost = compute_cost(Z3, Y)
# 定義優化器
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
#初始化變量
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# 疊代循環
for epoch in range(num_epochs):
epoch_cost = 0. # 每次疊代的 的代價
num_minibatches = int(m / minibatch_size) # 計算mini-batch 的資料集的劃分的個數
seed = seed + 1
minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
_ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y:minibatch_Y})
epoch_cost += minibatch_cost / num_minibatches
if print_cost ==True and epoch % 100 ==0:
print("第 %i次疊代後的代價: %f" %(epoch , epoch_cost))
if print_cost == True and epoch % 5 == 0:
costs.append(epoch_cost)
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
parameters = sess.run(parameters) # 擷取參數, 轉化成numpy.ndarray
print ("Parameters have been trained!")
correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
return parameters
參考
https://www.tensorflow.org/api_docs/python/tf