This Code Demonstrated how to build a Neural Network from scratch without using any python modules. (of course we use numpy :) )
1) Addition and Multiplication are represented a Gates
2) Sigmoid, Tanh are implemented as separate units
3) Softmax with Cross Entropy Loss is used.
Refer to Differentiation of Cross Entropy Loss to grasp details of SoftMax Class
4) Complete Backpropogation is implemented using backward() method.(Similar to pyTorch)
import numpy as np
class MultiplyGate:
def forward(self,W,X):
return np.dot(X,W)
def backward(self,W,X,dZ):
dW = np.dot(X.T,dZ)
dX = np.dot(dZ,W.T)
return dW,dX
class AddGate:
def forward(self,X,b):
return X+b
def backward(self,X,b,dZ):
db = np.dot(np.ones((1,dZ.shape[0])),dZ)
dX = dZ*np.ones_like(X)
return db,dX
class Sigmoid:
def forward(self,X):
return 1.0/(1.0+np.exp(-X))
def backward(self,X,top_diff):
output = self.forward(self,X)
return output*(1-output)*top_diff
class Tanh:
def forward(self,X):
return np.tanh(X)
def backward(self,X,top_diff):
output = self.forward(X)
return (1.0-np.square(output))*top_diff
class Softmax:
def predict(self,X):
exp_scores = np.exp(X)
return exp_scores/np.sum(exp_scores,axis=1,keepdims=True)
def loss(self,X,y):
num_examples = X.shape[0]
probs = self.predict(X)
correct_logprobs = -np.log(probs[range(num_examples),y])
data_loss = np.sum(correct_logprobs)
return (1./num_examples)*data_loss
def diff(self,X,y):
num_examples = X.shape[0]
probs = self.predict(X)
probs[range(num_examples),y] -= 1
return probs
class Model:
def __init__(self,layers_dim):
self.W=[]
self.b=[]
for i in range(len(layers_dim)-1):
self.W.append(np.random.randn(layers_dim[i],layers_dim[i+1])/np.sqrt(layers_dim[i]))
self.b.append(np.random.randn(layers_dim[i+1]).reshape(1,layers_dim[i+1]))
def calculate_loss(self,X,y):
mulGate = MultiplyGate()
addGate = AddGate()
layer = Tanh()
softmaxOutput = Softmax()
input = X
for i in range(len(self.W)):
mul = mulGate.forward(self.W[i],input)
add = addGate.forward(mul,self.b[i])
input = layer.forward(add)
return softmaxOutput.loss(input,y)
def predict(self,X):
mulGate = MultiplyGate()
addGate = AddGate()
layer = Tanh()
softmaxOutput = Softmax()
input = X
for i in range(len(self.W)):
mul = mulGate.forward(self.W[i],input)
add = addGate.forward(mul,self.b[i])
input = layer.forward(add)
probs =softmaxOutput.predict(input)
return np.argmax(probs,axis=1)
def train(self, X, y, num_passes=20000, epsilon=0.01, reg_lambda=0.01, print_loss=False):
mulGate = MultiplyGate()
addGate = AddGate()
layer = Tanh()
softmaxOutput = Softmax()
for epoch in range(num_passes):
# Forward propagation
input = X
forward = [(None, None, input)]
for i in range(len(self.W)):
mul = mulGate.forward(self.W[i], input)
add = addGate.forward(mul, self.b[i])
input = layer.forward(add)
forward.append((mul, add, input))
# Back propagation
dtanh = softmaxOutput.diff(forward[len(forward)-1][2], y)
for i in range(len(forward)-1, 0, -1):
dadd = layer.backward(forward[i][1], dtanh)
db, dmul = addGate.backward(forward[i][0], self.b[i-1], dadd)
dW, dtanh = mulGate.backward(self.W[i-1], forward[i-1][2], dmul)
# Add regularization terms (b1 and b2 don't have regularization terms)
dW += reg_lambda * self.W[i-1]
# Gradient descent parameter update
self.b[i-1] += -epsilon * db
self.W[i-1] += -epsilon * dW
if print_loss and epoch % 1000 == 0:
print("Loss after iteration %i: %f" %(epoch, self.calculate_loss(X, y)))
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model
# Generate a dataset and plot it
np.random.seed(0)
X, y = sklearn.datasets.make_moons(200, noise=0.20)
plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral)
plt.show()
layers_dim = [2,3,2]
model = Model(layers_dim)
model.train(X, y, num_passes=3000, epsilon=0.01, reg_lambda=0.01, print_loss=True)
import matplotlib.pyplot as plt
def plot_decision_boundary(pred_func, X, y):
# Set min and max values and give it some padding
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
plot_decision_boundary(lambda x: model.predict(x), X, y)
plt.title("Decision Boundary for hidden layer size 3")
plt.show()