01 - Welcome to the course!
02 - ——————— Part 1 - Artificial Neural Networks ———————
03 - ANN Intuition
002 Plan of Attack
003 The Neuron
04 - Building an ANN
001 Business Problem Description
数据集 (记录某虚构银行的客户的各项指标, target为是否离开了该银行)
003 Building an ANN
# %% part1
import numpy as np
import pandas as pd
import tensorflow as tf
print(tf.__version__) # 视频: '2.2.0'
# dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:-1].values # 所有行, 除了最后一列和前3列的所有列
y = dataset.iloc[:, -1].values # 所有行, 最后一列
# encoding categorical data
# (gender数据从[female,male]转为[0,1])
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])
print(X)
# (geography从[france,spain...]转为one-hot[[0,0,1...],[1,0,0...],...])
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(
# 对输入数据索引为1的列进行编码 - onehot
transformers=[('encoder', OneHotEncoder(), [1])],
remainder='passthrough'
)
X = np.array(ct.fit_transform(X))
print(X)
# split train & test dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# feature scaling 特征缩放 - 将不同特征的值量化到同一区间的方法。(归一化?)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
# fit_transform是fit和transform的组合,既包括了训练又包含了转换。
X_train = sc.fit_transform(X_train)
# 在Fit的基础上,进行标准化,降维,归一化等操作(看具体用的是哪个工具,如PCA,StandardScaler等)。
X_test = sc.transform(X_test)
# %% part2 - build the ANN
ann = tf.keras.models.Sequential()
'''
tf.keras.layers.Dense(
units, # 正整数,输出空间的维数
activation=None, # 激活函数,不指定则没有
use_bias=True, # 布尔值,是否使用偏移向量
kernel_initializer='glorot_uniform', # 核权重矩阵的初始值设定项
bias_initializer='zeros', # 偏差向量的初始值设定项
kernel_regularizer=None, # 正则化函数应用于核权矩阵
bias_regularizer=None, # 应用于偏差向量的正则化函数
activity_regularizer=None, # Regularizer function applied to the output of the layer (its "activation")
kernel_constraint=None, # Constraint function applied to the kernel weights matrix.
bias_constraint=None, **kwargs # Constraint function applied to the bias vector
)
'''
# 全连接层
ann.add(
tf.keras.layers.Dense(units=6, activation='relu')
)
# 隐藏层
ann.add(
tf.keras.layers.Dense(units=6, activation='relu')
)
# 输出层
ann.add(
tf.keras.layers.Dense(units=1, activation='sigmoid')
)
# %% part3 - train ANN
ann.compile(
optimizer='adam', # 优化器
loss='binary_crossentropy', # 损失函数
metrics=['accuracy'] # 评估指标
)
ann.fit(X_train, y_train, batch_size=32, epochs=100)
# %% predicting
print(
ann.predict([
# geography, credit score, gender, age, tenure, balance, number of products, have a card?, active? estimated salary
sc.transform([1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000])
])
)
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
print(
np.concatenate(
y_pred.reshape(len(y_pred), 1),
y_test.reshape(len(y_pred), 1),
1
)
)
# %% 混淆矩阵 主要用于比较分类结果和实际测得值,可以把分类结果的精度显示在一个混淆矩阵里面
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
'''
[[真阳, 假阴]
[假阳, 真阴]]
'''
print(accuracy_score(y_test, y_pred))
05 - ——————– Part 2 - Convolutional Neural Networks ——————–
07 - Building a CNN
# %% import
import tensorflow as tf
import keras
from keras.preprocessing.image import ImageDataGenerator
# tf 2.2.0 tk 2.3.1
print(tf.__version__, keras.__version__)
# %% part1 data preprocessing
train_datagen = ImageDataGenerator( # 图像增强器
rescale=1. / 255, # 将像素0~255归一化到0~1
# 下面是图像增强部分
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
train_set = train_datagen.flow_from_directory( # 从图像集选图并增强
'dataset/training_set',
target_size=(64, 64),
batch_size=32,
class_mode='binary' # 二分类
)
# 对测试set进行特征缩放
test_datagen = ImageDataGenerator( # 图像增强器
rescale=1. / 255 # 和训练集的缩放保持一致
)
test_set = validation_generator = test_datagen.flow_from_directory(
'dataset/test_set',
target_size=(64, 64),
batch_size=32,
class_mode='binary' # 二分类
)
# %% part2 building the CNN
cnn = tf.keras.models.Sequential()
cnn.add(tf.keras.layers.Conv2D(
filters=32, # 滤波器个数
kernel_size=3,
activation='relu',
input_shape=[64, 64, 3]
))
# 最大值池化(从指定矩阵窗口中选择最大的作为输出)
# 池化器大小 2x2, 步幅 2
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(tf.keras.layers.Conv2D(
filters=32, # 滤波器个数
kernel_size=3,
activation='relu',
))
# 最大值池化(从指定矩阵窗口中选择最大的作为输出)
# 池化器大小 2x2, 步幅 2
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
# flattening 展平
cnn.add(tf.keras.layers.Flatten())
# 全连接层
cnn.add(
tf.keras.layers.Dense(units=128, activation='relu')
)
# 输出层
cnn.add(
tf.keras.layers.Dense(units=1, activation='sigmoid')
)
# %% part3 training the CNN
cnn.compile(
optimizer='adam', # 优化器
loss='binary_crossentropy', # 损失函数
metrics=['accuracy'] # 评估指标
)
cnn.fit(x=train_set, validation_data=test_set, epochs=25)
# %% part4 making a single prediction
import numpy as np
from keras.preprocessing import image
test_image = image.load_img('dataset/single_prediction/cat_or_dog_1.jpg', target_size=(64, 64))
test_image = image.img_to_array(test_image) # 转为numpy数组
# 添加额外的batch维度
test_image = np.expand_dims(test_image, axis=0)
result = cnn.predict(test_image)
print(train_set.class_indices) # 索引和类的对应关系
if result[0][0] == 1:
prediction = 'dog'
else:
prediction = 'cat'
print(prediction)
08 - ———————- Part 3 - Recurrent Neural Networks ———————-
09 - RNN Intuition
002 Plan of attack
003 The idea behind Recurrent Neural Networks
- 权重是神经网络的长期记忆 - 颞叶 like ANN
- 识别图像 - 枕叶 like CNN
- 短期记忆 - 额叶 like RNN
004 The Vanishing Gradient Problem
005 LSTMs
006 Practical intuition
007 EXTRA LSTM Variations 3大变体
一些改进LSTM的工作
10 - Building a RNN
002 Building a RNN - Step 1
003 Building a RNN - Step 2
# Recurrent Neural Network
# %% Part 1 - Data Preprocessing
# import library
import numpy as np
import matplotlib.pyplot as plt
matplotlib.use('tkagg')
import pandas as pd
# import train set
dataset_train = pd.read_csv('dataset/Google_Stock_Price_Train.csv')
training_set = dataset_train.iloc[:, 1:2].values # 取所有行的第2列
# %% Part 2-Building the RNN
# %% Part 3-Making the predictions and visualising the results
004 Building a RNN - Step 3
# Recurrent Neural Network
# %% Part 1 - Data Preprocessing
# import library
...
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
# feature_range 数据变换后的期望范围
sc = MinMaxScaler(feature_range=(0, 1))
training_set_scaled = sc.fit_transform(training_set)
# %% Part 2-Building the RNN
# %% Part 3-Making the predictions and visualising the results
005 Building a RNN - Step 4
# Recurrent Neural Network
# %% Part 1 - Data Preprocessing
# import library
...
# import train set
...
# Feature Scaling
...
# Creating a data structure with 60 timesteps and 1 output 查看前60天的数据然后预测/学习当前数据
X_train = []
y_train = []
for i in range(60, 1258): # training_set.shape (1258,1)
X_train.append(training_set_scaled[i - 60:i, 0]) # 前t个数据
y_train.append(training_set_scaled[i, 0]) # 预测t+1的target
X_train, y_train = np.array(X_train), np.array(y_train)
# %% Part 2-Building the RNN
# %% Part 3-Making the predictions and visualising the results
006 Building a RNN - Step 5
# Reshaping (股票价格数, 时间步长, 指标数)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
007 Building a RNN - Step 6
# %% Part 2-Building the RNN
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
# Initialising the rnn
regressor = Sequential() # 回归预测连续值, 分类预测一个类别
008 Building a RNN - Step 7
# Adding the first LSTM layer and some Dropout regularisation
regressor.add(LSTM(
# 神经单元数
units=50,
# 是否返回序列, 如果需要下面继续堆叠LSTM, 就true, 不需要就false
return_sequences=True,
# 维度 (时间步长, target维度)
# Input_Sizes是每个时间点输入x的维度, 对于语句来说,就是一个字的embedding的向量维度
input_shape=(X_train.shape[1], 1)
))
regressor.add(Dropout(0.2)) # Dropout掉20%的神经元 正则化
009 Building a RNN - Step 8
# Adding a second LSTM layer and some Dropout regularisation
regressor.add(LSTM(units=50, return_sequences=True)) # 上面的LSTM已经说明, 自动推断input维度
regressor.add(Dropout(0.2))
# Adding a third LSTM layer and some Dropout regularisation
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))
# Adding a fourth ISTM layer and some Dropout regularisation
# regressor.add(LSTM(units=50, return_sequences=False)) return_sequences 默认false
regressor.add(LSTM(units=50))
regressor.add(Dropout(0.2))
010 Building a RNN - Step 9
# Adding the output layer
regressor.add(Dense(units=1))
011 Building a RNN - Step 10
# Compiling the RNN - 使用Adam优化器, 均方误差损失函数
regressor.compile(optimizer='adam', loss='mean_squared_error')
012 Building a RNN - Step 11
# Fitting the RNN to the Training set
regressor.fit(X_train, y_train, epochs=100, batch_size=32)
013 Building a RNN - Step 12 …
# %% Part 3-Making the predictions and visualising the results
# Getting the real stock price of 2017
dataset_test = pd.read_csv('dataset/Google_Stock_Price_Test.csv')
real_stock_price = dataset_test.iloc[:, 1:2].values # 取所有行的第2列
# Getting the predicted stock price of 2017
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis=0) # 连接数据集
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values # 包含了test set前60天数的数据集
inputs = inputs.reshape(-1, 1)
inputs = sc.transform(inputs) # 标准化
X_test = []
for i in range(60, 80):
X_test.append(inputs[i - 60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price) # 撤销之前sc的缩放
# visualising the results
plt.plot(real_stock_price, color='red', label='Real Google Stock Price')
plt.plot(predicted_stock_price, color='blue', label='Predicted Google Stock Price')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()
11 - Evaluating and Improving the RNN
12 - ———————— Part 4 - Self Organizing Maps ————————
13 - SOMs Intuition
什么是自组织映射?
一个特别有趣的无监督系统是基于竞争性学习,其中输出神经元之间竞争激活,结果是在任意时间只有一个神经元被激活。
这个激活的神经元被称为胜者神经元(winner-takes-all neuron)。
这种竞争可以通过在神经元之间具有横向抑制连接(负反馈路径)来实现。
其结果是神经元被迫对自身进行重新组合,这样的网络我们称之为自组织映射(Self Organizing Map,SOM)。
001 Plan of attack
002 How do Self-Organizing Maps Work
003 Why revisit K-Means
004 K-Means Clustering (Refresher)
消除决策复杂性
005 How do Self-Organizing Maps Learn (Part 1)
006 How do Self-Organizing Maps Learn (Part 2)
007 Live SOM example
008 Reading an Advanced SOM
009 EXTRA K-means Clustering (part 2)
随机初始化陷阱,初始化到一个离其他点很远,离一些点很近的位置,然后每次迭代都没有改变
010 EXTRA K-means Clustering (part 3)
14 - Building a SOM
001 How to get the dataset
002 Building a SOM - Step 1
# Self Organizing Map
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('dataset/Credit_Card_Applications.csv')
X = dataset.iloc[:, :-1].values # 除了最后一列的所有列
y = dataset.iloc[:, -1].values
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1)) # 缩放到0~1
X = sc.fit_transform(X)
003 Building a SOM - Step 2
# Training the SOM
from minisom import MiniSom
# SOM 的维度 (x,y); sigma 不同相邻节点的半径
som = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5)
som.random_weights_init(X) # 初始化权重
som.train_random(data=X, num_iteration=100)
004 Building a SOM - Step 3
# Visualizing the results
from pylab import bone, pcolor, colorbar, plot, show
bone() # 画白色窗口
pcolor(som.distance_map().T) # distance_map 返回所有神经元间的平均距离MID
colorbar() # 颜色-数值 对应条
markers = ['o', 's']
colors = ['r', 'g']
for i, x in enumerate(X): # i是第i个迭代,x是当前迭代的数据
w = som.winner(x)
plot(w[0] + 0.5, w[1] + 0.5,
# y不同,绘制不同图像(圆o 或 正方形s)
markers[y[i]],
# 给边缘画
markeredgecolor=colors[y[i]],
markerfacecolor='None', markersize=10, markeredgewidth=2)
show()
005 Building a SOM - Step 4
# Finding the frauds
mappings = som.win_map(X)
frauds = np.concatenate((mappings[(8, 1)], mappings[(6, 8)]), axis=0) # (8,1)是从上面画的图里找的坐标
# 反转缩放变换
frauds = sc.inverse_transform(frauds)
15 - Mega Case Study
"""## Importing the libraries"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
"""## Importing the dataset"""
dataset = pd.read_csv('dataset/Credit_Card_Applications.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
"""## Feature Scaling"""
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
X = sc.fit_transform(X)
"""##Training the SOM"""
from minisom import MiniSom
som = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5)
som.random_weights_init(X)
som.train_random(data=X, num_iteration=100)
"""##Visualizing the results"""
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o', 's']
colors = ['r', 'g']
for i, x in enumerate(X):
w = som.winner(x)
plot(w[0] + 0.5,
w[1] + 0.5,
markers[y[i]],
markeredgecolor=colors[y[i]],
markerfacecolor='None',
markersize=10,
markeredgewidth=2)
show()
"""## Finding the frauds"""
mappings = som.win_map(X) # 不同点和获胜节点之间距离的map
frauds = np.concatenate((mappings[(6, 8)], mappings[(5, 1)]), axis=0)
frauds = sc.inverse_transform(frauds)
"""##Printing the Fraunch Clients"""
print('Fraud Customer IDs')
for i in frauds[:, 0]:
print(int(i))
"""#Part 2 - Going from Unsupervised to Supervised Deep Learning
##Create Matrix of Features
"""
customers = dataset.iloc[:, 1:].values # 第一列以外的所有列
"""## Create Dependent Variable"""
is_fraud = np.zeros(len(dataset))
for i in range(len(dataset)):
if dataset.iloc[i, 0] in frauds: # 客户ID在欺诈者列表里
is_fraud[i] = 1
"""#Part 3 - ANN
### Feature Scaling
"""
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
customers = sc.fit_transform(customers)
"""## Building the ANN
##Import Tensorflow
"""
import tensorflow as tf
print(tf.__version__)
"""## Initializing the ANN"""
ann = tf.keras.models.Sequential()
"""##Adding the input layer and the first hidden layer"""
ann.add(tf.keras.layers.Dense(units=2, activation='relu'))
"""## Adding the output layer"""
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
"""## Training the ANN
## Compiling the ANN
"""
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
"""## Training the ANN on the Training set"""
ann.fit(customers, is_fraud, batch_size=1, epochs=10)
"""## Predicting test set results"""
y_pred = ann.predict(customers)
y_pred = np.concatenate((dataset.iloc[:, 0:1].values, y_pred), axis=1)
y_pred = y_pred[y_pred[:, 1].argsort()]
print(y_pred)
16 - ————————- Part 5 - Boltzmann Machines ————————-
17 - Boltzmann Machine Intuition
001 Plan of attack
002 Boltzmann Machine
003 Energy-Based Models (EBM)
玻尔兹曼分布(抽样分布) -> 玻尔兹曼机
005 Restricted Boltzmann Machine
玻尔兹曼机的节点的增加可能带来连接的指数型增长, 我们往往不能复现完整的玻尔兹曼机, 所以提出了受限玻尔兹曼机
006 Contrastive Divergence
差异对比允许玻尔兹曼机进行学习
007 Deep Belief Networks
008 Deep Boltzmann Machines
18 - Building a Boltzmann Machine
003 Building a Boltzmann Machine - Introduction
005 Building a Boltzmann Machine - Step 1
# Importing the libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
# importing the dataset
movies = pd.read_csv('ml-1m/movies.dat', sep='::', header=None, engine='python', encoding='latin-1')
users = pd.read_csv('ml-1m/users.dat', sep='::', header=None, engine='python', encoding='latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', header=None, engine='python', encoding='latin-1')
print(movies.shape, users.shape, ratings.shape)
006 Building a Boltzmann Machine - Step 2
# Preparing the training set and the test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter='\t')
training_set = np.array(training_set, dtype='int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter='\t')
test_set = np.array(test_set, dtype='int')
print(training_set.shape, test_set.shape) # (79999, 4) (19999, 4)
007 Building a Boltzmann Machine - Step 3
# Getting the number of users and movies
nb_users = int(max(max(training_set[:, 0]), max(test_set[:, 0]))) # 用户总数
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1]))) # movie总数
print(nb_users, nb_movies) # 943 1682
008 Building a Boltzmann Machine - Step 4
# Converting the data into an array with users in lines and movies in columns
def convert(data): # data: [uid, mid, rating, timestamp]
new_data = []
for id_users in range(1, nb_users + 1): # user/movie的id是从1开始
# 找到该用户评论过的movie & rating
id_movies = data[:, 1][data[:, 0] == id_users]
id_ratings = data[:, 2][data[:, 0] == id_users]
ratings = np.zeros(nb_movies) # 初始化0矩阵 - ratings [mid, rating]
ratings[id_movies - 1] = id_ratings # rating的id从0开始; 记录评分
new_data.append(list(ratings))
# new_data: [ [r1, r2, ... , rn], <- elements' line id = mid
# [r1, r2, ... , rn] ] <- line id = uid
return new_data
training_set = convert(training_set)
test_set = convert(test_set)
print(len(training_set), len(test_set)) # 943 943
009 Building a Boltzmann Machine - Step 5
# Converting the data into Torch tensors
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)
010 Building a Boltzmann Machine - Step 6
# Converting the ratings into binary ratings 1 (Liked) or 0 (Not Liked)
training_set[training_set == 0] = -1 # 用-1表示没有用户评分过
training_set[training_set == 1] = 0 # 评分1和2表示不喜欢
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1 # 评分3以上表示喜欢
test_set[test_set == 0] = -1 # 用-1表示没有用户评分过
test_set[test_set == 1] = 0 # 评分1和2表示不喜欢
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1 # 评分3以上表示喜欢
011 Building a Boltzmann Machine - Step 7
# Creating the architecture of the Neural Network
class RBM():
def __init__(self, nv, nh):
'''
:param nv: 可视层节点数
:param nh: 隐藏层节点数
'''
self.W = torch.randn(nh, nv) # 符合正态分布的初始化
self.a = torch.randn(1, nh) # 初始化偏置, 用于pytorch不能一维输入, 所以加了个值为1的批次维度
self.b = torch.randn(1, nv)
012 Building a Boltzmann Machine - Step 8
def sample_h(self, x): # 激活函数? 给定v的条件下h(隐藏神经元)为1的概率
wx = torch.mm(x, self.W.t()) # 矩阵相乘
activation = wx + self.a.expand_as(wx)
# h对应电影特征, v对应评分
p_h_given_v = torch.sigmoid(activation)
return p_h_given_v, torch.bernoulli(p_h_given_v) # 伯努利函数, 隐藏层神经元的采样值
013 Building a Boltzmann Machine - Step 9
def sample_v(self, y): # 激活函数? 给定v的条件下v(可见神经元)为1的概率
wy = torch.mm(y, self.W) # 矩阵相乘
activation = wy + self.b.expand_as(wy)
# h对应电影特征, v对应评分
p_v_given_h = torch.sigmoid(activation)
return p_v_given_h, torch.bernoulli(p_v_given_h) # 伯努利函数, 隐藏层神经元的采样值
014 Building a Boltzmann Machine - Step 10
def train(self, v0, vk, ph0, phk):
self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
self.b += torch.sum((v0 - vk), 0) # 0是为了保持2维
self.a += torch.sum((ph0 - phk), 0)
015 Building a Boltzmann Machine - Step 11
nv = len(training_set[0]) # 1682
nh = 100
batch_size = 100
rbm = RBM(nv, nh)
016 Building a Boltzmann Machine - Step 12
# Training the RBM
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
train_loss = 0
s = 0.
for id_user in range(0, nb_users - batch_size, batch_size):
vk = training_set[id_user:id_user + batch_size]
v0 = training_set[id_user:id_user + batch_size] # 用户已经评分过的数据
ph0,_=rbm.sample_h(v0) # 用户真实评分, 隐藏节点=1的概率
for k in range(10):
017 Building a Boltzmann Machine - Step 13
# Training the RBM
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
train_loss = 0
s = 0.
for id_user in range(0, nb_users - batch_size, batch_size):
vk = training_set[id_user:id_user + batch_size]
v0 = training_set[id_user:id_user + batch_size] # 用户已经评分过的数据
ph0, _ = rbm.sample_h(v0) # 用户真实评分, 隐藏节点=1的概率
for k in range(10): # 吉布斯采样
# 推断hk
_, hk = rbm.sample_h(vk)
# 更新vk
_, vk = rbm.sample_v(hk)
# -1(没评价的)节点要被冻结(不更新)
vk[v0 < 0] = v0[v0 < 0]
phk, _ = rbm.sample_h(vk)
rbm.train(v0, vk, ph0, phk)
train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
# 更新计数器
s += 1.
print('epoch: ' + str(epoch) + ' loss: ' + str(train_loss / s))
018 Building a Boltzmann Machine - Step 14
# Testing the RBM
test_loss = 0
s = 0.
for id_user in range(nb_users):
v = training_set[id_user:id_user + 1] # 输入训练集数据, 激活RBM输出预测, 不包含测试集答案
vt = test_set[id_user:id_user + 1] # 用户已经评分过的数据
# 一次吉布斯采样
if len(vt[vt >= 0]) > 0:
# 推断hk
_, h = rbm.sample_h(v)
# 更新vk
_, v = rbm.sample_v(h)
test_loss += torch.mean(torch.abs(vt[vt >= 0] - v[vt >= 0]))
# 更新计数器
s += 1.
print('test loss: ' + str(test_loss / s))