文章目錄
-
- 原理
- 實作
-
- init
- Keras
- Tensorflow
- numpy
原理
l o g l o s s = 1 n ∑ [ − y l o g y ^ − ( 1 − y ) l o g ( 1 − y ^ ) ] logloss=\frac{1}{n}\sum[-ylog\hat{y}-(1-y)log(1-\hat{y})] logloss=n1∑[−ylogy^−(1−y)log(1−y^)]
實作
init
import tensorflow as tf
import numpy as np
from tensorlfow import keras
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Input, LSTM
tf.random.set_seed(1)
rows = 10
columns = 3
epsilon = 1e-7 # sklearn keras 源碼都有epsilon,why?防止log(0)出現
learning_rate = 0.01
train_x = np.ones(shape=(rows, columns), dtype="float32") # 這裡一定要dtype一緻,否則numpy與keras計算結果會有差異,我這裡統一使用float32
train_y = np.vstack([np.ones(shape=(int(rows/2), 1), dtype="float32"), np.zeros(shape=(int(rows/2),1), dtype="float32")])
w = tf.random.normal(shape=(columns, 1), dtype=tf.float32)
b = tf.zeros(shape=(1,), dtype=tf.float32)
def w_init(shape, dtype=tf.float32):
return tf.convert_to_tensor(w, dtype=tf.float32)
def b_init(shape, dtype=tf.float32):
return tf.convert_to_tensor(b, dtype=tf.float32)
Keras
model1 = Sequential()
model1.add(Input(shape=(columns, )))
model1.add(Dense(units=1, kernel_initializer=w_init, bias_initializer=b_init, activation="sigmoid"))
h1 = model1.predict(train_x)
model1.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), metrics=["accuracy"])
model1.fit(train_x, train_y, epochs=1, batch_size=rows)
w1, b1 = model1.layers[0].weights
Tensorflow
x = tf.Variable(train_x, dtype=tf.float32)
y = tf.Variable(train_y, dtype=tf.float32)
with tf.GradientTape(persistent=True) as tape:
tape.watch([w, b])
y_pred = tf.sigmoid(tf.matmul(x, w) + b)
loss = -1*train_y*tf.math.log(y_pred+epsilon) - (1-train_y)*tf.math.log(1-y_pred+epsilon)
dw2 = tape.gradient(target=loss, sources=w)
db2 = tape.gradient(target=loss, sources=b)
w2 = w - dw2*learning_rate
b2 = b - db2*learning_rate
numpy
epochs = 1
def sigmoid(x):
return 1/(1+np.exp(-x))
w3 = w
b3 = b
h3 = None
for epoch in range(epochs):
h3 = sigmoid(np.dot(train_x, w3)+b3)
loss = -1*np.sum(train_y*np.log(h3) + (1-train_y)*np.log(1-h3))/rows
print(f"loss: {loss}")
dw3 = np.dot(train_x.T, h3-train_y)
db3 = np.dot(np.ones(shape=(1, rows), dtype="float32"), h3-train_y)
w3 = w3 - dw3*learning_rate
b3 = b3 - db3*learning_rate
w3
b3
epochs = 1
def sigmoid(x):
return 1/(1+np.exp(-x))
w4 = w
b4 = b
h4 = None
for epoch in range(epochs):
h4 = sigmoid(np.dot(train_x, w4)+b4)
loss = -1*np.sum(train_y*np.log(h4) + (1-train_y)*np.log(1-h4))/rows
print(f"loss: {loss}")
dw4 = np.dot(train_x.T, (h4-train_y-2*train_y*epsilon+epsilon)/(h4+epsilon)/(1-h4+epsilon))
dw4 = np.dot(train_x.T, -1*train_y/(h4+epsilon)+(1-train_y)/(1-h4+epsilon))
db4 = np.dot(np.ones(shape=(1, rows), dtype="float32"), h4-train_y)
w4 = w4 - dw4*learning_rate
b4 = b4 - db4*learning_rate
w4
b4