Notes

Notes - notes.io

Lab-1 : Apply MP Neuron and perceptron to solve a binary classification problem

#or gate
import pandas as pd
data={'Input1':[1,1,0,0],'Input2':[1,0,1,0],'Output':[1,1,1,0]}
df_OR=pd.DataFrame(data)
df_OR
inputs=df_OR.iloc[:,0:2]
output=df_OR.iloc[:,2]
# Single layer perceptron -- OR Gate
def unit(a):
if a>0:
return 1
else:
return 0
def percep(x,w,b):
v=np.dot(x,w)+b
return unit(v)
def orgate(x):
w=np.array([2,2])
b=-1
return percep(x,w,b)
t1=np.array(inputs.iloc[0])
t2=np.array(inputs.iloc[1])
t3=np.array(inputs.iloc[2])
t4=np.array(inputs.iloc[3])
print('OR({},{}) = {}'.format(1,1,orgate(t1)))
print('OR({},{}) = {}'.format(1,0,orgate(t2)))
print('OR({},{}) = {}'.format(0,1,orgate(t3)))
print('OR({},{}) = {}'.format(0,0,orgate(t4)))
# McCulloch-Pitts Neuron -- OR Gate
def func_or(a):
if a>=1:
return 1
else:
return 0
def orgate(x):
v=np.sum(x)
return func_or(v)
t1=np.array(inputs.iloc[0])
t2=np.array(inputs.iloc[1])
t3=np.array(inputs.iloc[2])
t4=np.array(inputs.iloc[3])
print('OR({},{}) = {}'.format(1,1,orgate(t1)))
print('OR({},{}) = {}'.format(1,0,orgate(t2)))
print('OR({},{}) = {}'.format(0,1,orgate(t3)))
print('OR({},{}) = {}'.format(0,0,orgate(t4)))

Lab-2: Apply sigmoid neuron to solve a real-world classification / regression problem

import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
model=tf.keras.models.Sequential([
tf.keras.layers.Dense(512,activation='sigmoid',input_shape=(4,)),
tf.keras.layers.Dense(128,activation='sigmoid'),
tf.keras.layers.Dense(32,activation='sigmoid'),
tf.keras.layers.Dense(3,activation='softmax')
])
iris = datasets.load_iris()
X = iris.data
y = iris.target
#
# Create training and test split
#
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
train_labels = to_categorical(y_train)
test_labels = to_categorical(y_test)
model.fit(X_train,train_labels,epochs=50,batch_size=40)
loss,accuracy=model.evaluate(X_test,test_labels)
y_predict=model.predict(X_test)

Lab-3: Build a FFN Network to solve a Multi- class classification problem

import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Activation,Dropout
iris = datasets.load_iris()
X = iris.data
y = iris.target
#
# Create training and test split
#
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
train_labels = to_categorical(y_train)
test_labels = to_categorical(y_test)
model3=Sequential([
Dense(512,activation='relu',input_shape=X_train.shape[1:]),
tf.keras.layers.BatchNormalization(),
Dense(128,activation='relu'),
tf.keras.layers.BatchNormalization(),
Dense(32,activation='relu'),
Dense(3,activation='softmax')
])
model3.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model3.fit(X_train,train_labels,epochs=50,batch_size=40)
loss,accuracy=model3.evaluate(X_test,test_labels)

Lab-4: Implement linear regression with stochastic gradient descent.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
style.use("fivethirtyeight")
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
data = pd.read_csv('/content/Salary_Data (2).csv')
data
x=np.array(data['YearsExperience'])
y=np.array(data['Salary'])
l=len(x)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=6)
lx=len(x_train)
m=0.1
c=0.5
alpha=0.01
n=5000
for i in range(n):
slope=0
intercept=0
for j in range(lx):
random_index=np.random.randint(lx)
intercept=intercept+((m*x_train[random_index:random_index+1]+c)-y_train[random_index:random_index+1])
slope=slope+((m*x_train[random_index:random_index+1]+c)-y_train[random_index:random_index+1])*x_train[random_index:random_index+1]
c=c-alpha*(intercept/lx)
m=m-alpha*(slope/lx)
print(f"slope is {m}")
print(f"intercept is {c}")
y_pred=np.dot(m[0],x_test)+c[0]
y_pred
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
from sklearn.metrics import explained_variance_score
print(mean_squared_error(y_test,y_pred))
print(np.sqrt(mean_squared_error(y_test,y_pred)))
print(mean_absolute_error(y_test,y_pred))
print(explained_variance_score(y_test,y_pred))
print(r2_score(y_test,y_pred))

Lab 5: Implement linear regression with stochastic mini-batch gradient descent and compare
the results with previous exercise.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
style.use('fivethirtyeight')
import warnings
warnings.filterwarnings('ignore')
data = pd.read_csv('/content/salary.csv')
data
x=np.array(data['YearsExperience'])
y=np.array(data['Salary'])
l=len(x)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=32)
lx=len(x_train)
class mini_batch_gradient_descent:

def create_batch(self,x_train,y_train,batch_size):
mini_batches=[]
data=np.stack((x_train,y_train),axis=1)
np.random.shuffle(data)
no_of_batches=x_train.shape[0]//batch_size
for i in range(no_of_batches):
mini_batch=data[i*batch_size:(i+1)*batch_size]
mini_batches.append((mini_batch[:,0],mini_batch[:,1]))
if x_train.shape[0]%batch_size!=0:
mini_batch=data[(i+1)*batch_size:]
mini_batches.append((mini_batch[:,0],mini_batch[:,1]))
return mini_batches

def fit(self,x_train,y_train,alpha,epochs,batch_size):
self.m=np.random.randn(1,1)
self.c=np.random.randn(1,1)
l=len(x_train)
for i in range(epochs):
batches=self.create_batch(x_train,y_train,batch_size)
for batch in batches:
xb=batch[0]
yb=batch[1]
xb=xb.reshape(1,xb.shape[0])
intecept=np.sum((np.dot(self.m,xb)+self.c)-yb)
slope=np.sum(((np.dot(self.m,xb)+self.c)-yb)*xb)
self.m=self.m-alpha*(slope/l)
self.c=self.c-alpha*(intecept/l)

def slope_intercept(self):
print(f"slope is {self.m[0][0]}")
print(f"intecept is {self.c[0][0]}")

def predict(self,x_test):
x_test=x_test.reshape(x_test.shape[0],1)
self.m=self.m.reshape(self.m.shape[1],self.m.shape[0])
result=np.dot(x_test,self.m)+self.c
return result
mgd=mini_batch_gradient_descent()
mgd.fit(x_train,y_train,0.01,4000,4)
mgd.slope_intercept()
y_pred=mgd.predict(x_test)
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
from sklearn.metrics import explained_variance_score
print(mean_squared_error(y_test,y_pred))
print(np.sqrt(mean_squared_error(y_test,y_pred)))
print(mean_absolute_error(y_test,y_pred))
print(explained_variance_score(y_test,y_pred))
print(r2_score(y_test,y_pred))

Lab-6: Optimizing neural networks using L2 regularization, Dropout, data augmentation and
early stopping.

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
#Augmentation
mnist = tf.keras.datasets.mnist #Get the data
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train = tf.keras.utils.normalize(x_train,axis=1)
x_test = tf.keras.utils.normalize(x_test,axis=1)
image_size = x_train.shape[1]
input_size = image_size * image_size
batch_size = 128
hidden_units = 256
epochs = 20
max_batches = len(x_train) / batch_size
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(zoom_range = 0.2,)
# fit the generator
datagen.fit(x_train.reshape(x_train.shape[0], 28, 28, 1))
# define number of rows & columns
num_row = 2
num_col = 8
num= num_row*num_col
# plot before
print('BEFORE:n')
# plot images
fig1, axes1 = plt.subplots(num_row, num_col, figsize=(1.5*num_col,2*num_row))
for i in range(num):
ax = axes1[i//num_col, i%num_col]
ax.imshow(x_train[i], cmap='gray_r')
ax.set_title('Label: {}'.format(y_train[i]))
plt.tight_layout()
plt.show()
# plot after
print('AFTER:n')
fig2, axes2 = plt.subplots(num_row, num_col, figsize=(1.5*num_col,2*num_row))
for X, Y in datagen.flow(x_train.reshape(x_train.shape[0], 28, 28, 1),y_train.reshape(y_train.shape[0], 1),batch_size=num,shuffle=False):
for i in range(0, num):
ax = axes2[i//num_col, i%num_col]
ax.imshow(X[i].reshape(28,28), cmap='gray_r')
ax.set_title('Label: {}'.format(int(Y[i])))
break
plt.tight_layout()
plt.show()
#Create Neural Network
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128,activation = tf.nn.relu))
model.add(tf.keras.layers.Dense(128,activation = tf.nn.relu))
model.add(tf.keras.layers.Dense(10,activation = tf.nn.softmax))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(X,Y,epochs=500)
model.summary()
x_test = np.reshape(x_test, [-1, input_size])
scores = model.evaluate(x_test,
y_test,
batch_size=batch_size,
verbose=False)
print('Test loss:', scores[0])
print('Test accuracy: %0.1f%%' % (100 * scores[1]) )
#Show the loss and accuracy.
x_test = np.reshape(x_test, [-1, input_size])
val_loss,val_acc = model.evaluate(x_test,y_test)
print(val_loss,val_acc)
model.save('epic_num_reader_augmentation.model.h5')
new_model = tf.keras.models.load_model('/content/epic_num_reader_augmentation.model.h5')
predictions = new_model.predict([x_test])
print("The number is : ",np.argmax(predictions[2]))

#Show the image
plt.imshow(x_test[2])
plt.show()

#L2 Regularization, early stopping, drop out
mnist = tf.keras.datasets.mnist #Get the data
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train = tf.keras.utils.normalize(x_train,axis=1)
x_test = tf.keras.utils.normalize(x_test,axis=1)
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,MaxPool2D,Dropout
model2=Sequential([
Flatten(),
Dense(128,activation='relu',activity_regularizer=tensorflow.keras.regularizers.L2(0.01)),
Dropout(0.5),
Dense(54,activation='relu',activity_regularizer=tensorflow.keras.regularizers.L2(0.01)),
Dropout(0.5),
Dense(10,activation='softmax',activity_regularizer=tensorflow.keras.regularizers.L2(0.01))
])
model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['accuracy'])
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2)
model2.fit(x_train,y_train,epochs=50,callbacks=[callback])
#Show the loss and accuracy.
val_loss,val_acc = model.evaluate(x_test,y_test)
print(val_loss,val_acc)

model.save('epic_num_reader_usingalltech.model.h5')
new_model = tf.keras.models.load_model('/content/epic_num_reader_usingalltech.model.h5')
predictions = new_model.predict([x_test])
#Convert to understandable form.
print("The number is : ",np.argmax(predictions[11]))
#If you want change the test and prediction numbers

#Show the image
plt.imshow(x_test[11])
plt.show()

Lab-7: Implement skip gram model to predict words within a certain range before and after the
current word.

from nltk.corpus import gutenberg # to get bible corpus
from string import punctuation # to remove punctuation from corpus
import nltk
import re
import numpy as np
from keras.preprocessing import text
from keras.preprocessing.sequence import skipgrams
from keras.layers import *
from keras.layers.core import Dense, Reshape
from keras.layers import Embedding
from keras.models import Model,Sequential
nltk.download('gutenberg')
nltk.download('punkt')
nltk.download('stopwords')
stop_words = nltk.corpus.stopwords.words('english')
bible = gutenberg.sents("bible-kjv.txt")
remove_terms = punctuation + '0123456789'
bible
wpt = nltk.WordPunctTokenizer()
def normalize_document(doc):
# lower case and remove special characterswhitespaces
doc = re.sub(r'[^a-zA-Zs]', '', doc,re.I|re.A)
doc = doc.lower()
doc = doc.strip()
# tokenize document
tokens = wpt.tokenize(doc)
# filter stopwords out of document
filtered_tokens = [token for token in tokens if token not in stop_words]
# re-create document from filtered tokens
doc = ' '.join(filtered_tokens)
return doc

normalize_corpus = np.vectorize(normalize_document)
norm_bible = [[word.lower() for word in sent if word not in remove_terms] for sent in bible]
norm_bible = [' '.join(tok_sent) for tok_sent in norm_bible]
norm_bible = filter(None, normalize_corpus(norm_bible))
norm_bible = [tok_sent for tok_sent in norm_bible if len(tok_sent.split()) > 2]
tokenizer = text.Tokenizer()
tokenizer.fit_on_texts(norm_bible)# generate skip-grams
skip_grams = [skipgrams(wid, vocabulary_size=vocab_size, window_size=10) for wid in wids]
# view sample skip-grams
pairs, labels = skip_grams[0][0], skip_grams[0][1]
for i in range(10):
print("({:s} ({:d}), {:s} ({:d})) -> {:d}".format(
id2word[pairs[i][0]], pairs[i][0],
id2word[pairs[i][1]], pairs[i][1],
labels[i]))
word2id = tokenizer.word_index
id2word = {v:k for k, v in word2id.items()}
vocab_size = len(word2id) + 1
wids = [[word2id[w] for w in text.text_to_word_sequence(doc)] for doc in norm_bible]
print('Vocabulary Size:', vocab_size)
print('Vocabulary Sample:', list(word2id.items())[:5])
# build skip-gram architecture
embed_size = 100
word_model = Sequential()
word_model.add(Embedding(vocab_size, embed_size,
embeddings_initializer="glorot_uniform",
input_length=1))
word_model.add(Reshape((embed_size, )))

context_model = Sequential()
context_model.add(Embedding(vocab_size, embed_size,
embeddings_initializer="glorot_uniform",
input_length=1))
context_model.add(Reshape((embed_size,)))
merged_output = add([word_model.output, context_model.output])
model_combined = Sequential()
model_combined.add(Dense(1, kernel_initializer="glorot_uniform", activation="sigmoid"))
final_model = Model([word_model.input, context_model.input], model_combined(merged_output))
final_model.compile(loss="mean_squared_error", optimizer="rmsprop")
final_model.summary()
# visualize model structure
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(final_model, show_shapes=True, show_layer_names=False,
rankdir='TB').create(prog='dot', format='svg'))
for epoch in range(1, 3):
loss = 0
for i, elem in enumerate(skip_grams):
pair_first_elem = np.array(list(zip(*elem[0]))[0], dtype='int32')
pair_second_elem = np.array(list(zip(*elem[0]))[1], dtype='int32')
labels = np.array(elem[1], dtype='int32')
X = [pair_first_elem, pair_second_elem]
Y = labels
if i % 10000 == 0:
print('Processed {} (skip_first, skip_second, relevance) pairs'.format(i))
loss += final_model.train_on_batch(X,Y)

print('Epoch:', epoch, 'Loss:', loss)
from sklearn.metrics.pairwise import euclidean_distances
word_embed_layer = word_model.layers[0]
weights = word_embed_layer.get_weights()[0][1:]

distance_matrix = euclidean_distances(weights)
print(distance_matrix.shape)

similar_words = {search_term: [id2word[idx] for idx in distance_matrix[word2id[search_term]-1].argsort()[1:6]+1]
for search_term in ['god', 'jesus','egypt', 'john', 'famine']}

similar_words

Lab-8: Implement LeNet for image classification

import tensorflow as tf
from tensorflow import keras
import numpy as np
(train_x, train_y), (test_x, test_y) = keras.datasets.mnist.load_data()
train_x = train_x / 255.0
test_x = test_x / 255.0
train_x = tf.expand_dims(train_x, 3)
test_x = tf.expand_dims(test_x, 3)
val_x = train_x[:5000]
val_y = train_y[:5000]
train_x[0].shape
lenet_5_model = keras.models.Sequential([
keras.layers.Conv2D(6, kernel_size=5, strides=1, activation='tanh', input_shape=train_x[0].shape, padding='same'), #C1
keras.layers.AveragePooling2D(), #S2
keras.layers.Conv2D(16, kernel_size=5, strides=1, activation='tanh', padding='valid'), #C3
keras.layers.AveragePooling2D(), #S4
keras.layers.Conv2D(120, kernel_size=5, strides=1, activation='tanh', padding='valid'), #C5
keras.layers.Flatten(), #Flatten
keras.layers.Dense(84, activation='tanh'), #F6
keras.layers.Dense(10, activation='softmax') #Output layer
])
lenet_5_model.summary()
lenet_5_model.compile(optimizer='adam',loss=keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])
lenet_5_model.fit(train_x, train_y, epochs=5, validation_data=(val_x, val_y))
lenet_5_model.evaluate(test_x, test_y)

Lab-9: Implement ResNet for detecting Objects.

import tensorflow as tf
from tensorflow import keras
import numpy as np
from tensorflow.keras.applications import ResNet50
from keras.utils import to_categorical
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
model_resnet_50 = Sequential()
model_resnet_50.add(ResNet50(include_top=False,weights="imagenet",input_tensor=None,input_shape=(32,32,3),pooling='avg',classes=10))
model_resnet_50.add(Flatten())
model_resnet_50.add(Dense(1024, activation='relu'))
model_resnet_50.add(Dense(512, activation='relu'))
model_resnet_50.add(Dense(10, activation='softmax'))
model_resnet_50.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model_resnet_50.summary()
model_resnet_50.fit(x_train, y_train, batch_size=128, epochs=20, verbose=1, validation_data=(x_test, y_test))
loss, accuracy = model_resnet_50.evaluate(x_test, y_test, batch_size=64)
Actual = np.argmax(model_resnet_50.predict(x_test),axis=1)
Actual
for i in range(len(y_test)):
print("X = %s, Predicted = %s" % (y_test[i], Actual[i]))

Lab-10: Transfer learning implementation using VGG16 model to classify images.

import tensorflow_datasets as tfds
from tensorflow.keras.utils import to_categorical
## Loading images and labels
(train_ds, train_labels), (test_ds, test_labels) = tfds.load("tf_flowers",split=["train[:70%]", "train[:30%]"],batch_size=-1,as_supervised=True,)
print(train_labels)
import tensorflow as tf
train_ds = tf.image.resize(train_ds, (150, 150))
test_ds = tf.image.resize(test_ds, (150, 150))
train_ds[0].shape
## Transforming labels to correct format
train_labels = to_categorical(train_labels, num_classes=5)
test_labels = to_categorical(test_labels, num_classes=5)
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

## Loading VGG16 model
base_model = VGG16(weights="imagenet", include_top=False, input_shape=train_ds[0].shape)
base_model.trainable = False
## Preprocessing input
train_ds = preprocess_input(train_ds)
test_ds = preprocess_input(test_ds)
base_model.summary()
from tensorflow.keras import layers, models
model=models.Sequential()
model.add(base_model)
model.add(layers.Flatten())
model.add(layers.Dense(50,activation='relu'))
model.add(layers.Dense(20,activation='relu'))
model.add(layers.Dense(5,activation='softmax'))
model.summary()
from tensorflow.keras.callbacks import EarlyStopping
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'],)
es = EarlyStopping(monitor='val_accuracy', mode='max', patience=5, restore_best_weights=True)
model.fit(train_ds, train_labels, epochs=50, validation_split=0.2, batch_size=32, callbacks=[es])
val_loss,val_acc = model.evaluate(test_ds,test_labels)
print(val_loss,val_acc)

Lab-11: Building a RNN to perform Character level language modeling.

import numpy as np
import matplotlib.pyplot as plt

def initialize_parameters(vocab_size, hidden_layer_size):

parameters = {}
parameters["Whh"] = np.random.randn(
hidden_layer_size, hidden_layer_size) * 0.01
parameters["Wxh"] = np.random.randn(hidden_layer_size, vocab_size) * 0.01
parameters["b"] = np.zeros((hidden_layer_size, 1))
parameters["Why"] = np.random.randn(vocab_size, hidden_layer_size) * 0.01
parameters["c"] = np.zeros((vocab_size, 1))

return parameters

def initialize_adam(parameters):

parameters_names = ["Whh", "Wxh", "b", "Why", "c"]
v = {}
s = {}

for param_name in parameters_names:
v["d" + param_name] = np.zeros_like(parameters[param_name])
s["d" + param_name] = np.zeros_like(parameters[param_name])

return v, s

def initialize_rmsprop(parameters):

parameters_names = ["Whh", "Wxh", "b", "Why", "c"]
s = {}

for param_name in parameters_names:
s["d" + param_name] = np.zeros_like(parameters[param_name])

return s

def softmax(z):

e_z = np.exp(z)
probs = e_z / np.sum(e_z)

return probs

def rnn_forward(x, y, h_prev, parameters):

# Retrieve parameters
Wxh, Whh, b = parameters["Wxh"], parameters["Whh"], parameters["b"]
Why, c = parameters["Why"], parameters["c"]

# Initialize inputs, hidden state, output, and probabilities dictionaries
xs, hs, os, probs = {}, {}, {}, {}

# Initialize x0 to zero vector
xs[0] = np.zeros((vocab_size, 1))

# Initialize loss and assigns h_prev to last hidden state in hs
loss = 0
hs[-1] = np.copy(h_prev)

# Forward pass: loop over all characters of the name
for t in range(len(x)):
# Convert to one-hot vector
if t > 0:
xs[t] = np.zeros((vocab_size, 1))
xs[t][x[t]] = 1
# Hidden state
hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t - 1]) + b)
# Logits
os[t] = np.dot(Why, hs[t]) + c
# Probs
probs[t] = softmax(os[t])
# Loss
loss -= np.log(probs[t][y[t], 0])

cache = (xs, hs, probs)

return loss, cache

def smooth_loss(loss, current_loss):

return 0.999 * loss + 0.001 * current_loss

def clip_gradients(gradients, max_value):

for grad in gradients.keys():
np.clip(gradients[grad], -max_value, max_value, out=gradients[grad])

return gradients

def rnn_backward(y, parameters, cache):

# Retrieve xs, hs, and probs
xs, hs, probs = cache

# Initialize all gradients to zero
dh_next = np.zeros_like(hs[0])

parameters_names = ["Whh", "Wxh", "b", "Why", "c"]
grads = {}
for param_name in parameters_names:
grads["d" + param_name] = np.zeros_like(parameters[param_name])

# Iterate over all time steps in reverse order starting from Tx
for t in reversed(range(len(xs))):
dy = np.copy(probs[t])
dy[y[t]] -= 1
grads["dWhy"] += np.dot(dy, hs[t].T)
grads["dc"] += dy
dh = np.dot(parameters["Why"].T, dy) + dh_next
dhraw = (1 - hs[t] ** 2) * dh
grads["dWhh"] += np.dot(dhraw, hs[t - 1].T)
grads["dWxh"] += np.dot(dhraw, xs[t].T)
grads["db"] += dhraw
dh_next = np.dot(parameters["Whh"].T, dhraw)
# Clip the gradients using [-5, 5] as the interval
grads = clip_gradients(grads, 5)
# Get the last hidden state
h_prev = hs[len(xs) - 1]

return grads, h_prev

def update_parameters_with_adam(
parameters, grads, v, s, t, learning_rate, beta1=0.9, beta2=0.999,
epsilon=1e-8):

parameters_names = ["Whh", "Wxh", "b", "Why", "c"]
v_corrected = {}
s_corrected = {}

for param_name in parameters_names:
# Update the moving average of first gradient and squared gradient
v["d" + param_name] = beta1 * v["d" + param_name] +
(1 - beta1) * grads["d" + param_name]
s["d" + param_name] = beta2 * s["d" + param_name] +
(1 - beta2) * np.square(grads["d" + param_name])

# Compute the corrected-bias estimate of the moving averages
v_corrected["d" + param_name] = v["d" + param_name] / (1 - beta1**t)
s_corrected["d" + param_name] = s["d" + param_name] / (1 - beta2**t)

# update parameters
parameters[param_name] -= (learning_rate *
v_corrected["d" + param_name])
/ (np.sqrt(s_corrected["d" + param_name] + epsilon))

return parameters, v, s

def update_parameters(parameters, grads, learning_rate):
for param in parameters.keys():
parameters[param] -= learning_rate * grads["d" + param]

return parameters

def update_parameters_with_rmsprop(
parameters, grads, s, beta=0.9, learning_rate=0.001, epsilon=1e-8):

parameters_names = ["Whh", "Wxh", "b", "Why", "c"]

for param_name in parameters_names:
# Update exponential weighted average of squared gradients
s["d" + param_name] = beta * s["d" + param_name] +
(1 - beta) * np.square(grads["d" + param_name])

# Update parameters
parameters[param_name] -= (learning_rate * grads["d" + param_name])
/ (np.sqrt(s["d" + param_name] + epsilon))

return parameters, s

def sample(parameters, idx_to_chars, chars_to_idx, n):

# Retrienve parameters, shapes, and vocab size
Whh, Wxh, b = parameters["Whh"], parameters["Wxh"], parameters["b"]
Why, c = parameters["Why"], parameters["c"]
n_h, n_x = Wxh.shape
vocab_size = c.shape[0]

# Initialize a0 and x1 to zero vectors
h_prev = np.zeros((n_h, 1))
x = np.zeros((n_x, 1))

# Initialize empty sequence
indices = []
idx = -1
counter = 0
while (counter <= n and idx != chars_to_idx["n"]):
# Fwd propagation
h = np.tanh(np.dot(Whh, h_prev) + np.dot(Wxh, x) + b)
o = np.dot(Why, h) + c
probs = softmax(o)

# Sample the index of the character using generated probs distribution
idx = np.random.choice(vocab_size, p=probs.ravel())

# Get the character of the sampled index
char = idx_to_chars[idx]

# Add the char to the sequence
indices.append(idx)

# Update a_prev and x
h_prev = np.copy(h)
x = np.zeros((n_x, 1))
x[idx] = 1

counter += 1
sequence = "".join([idx_to_chars[idx] for idx in indices if idx != 0])

return sequence

def model(
file_path, chars_to_idx, idx_to_chars, hidden_layer_size, vocab_size,
num_epochs=10, learning_rate=0.01):

# Get the data
with open(file_path) as f:
data = f.readlines()
examples = [x.lower().strip() for x in data]

# Initialize parameters
parameters = initialize_parameters(vocab_size, hidden_layer_size)

# Initialize Adam parameters
s = initialize_rmsprop(parameters)

# Initialize loss
smoothed_loss = -np.log(1 / vocab_size) * 7

# Initialize hidden state h0 and overall loss
h_prev = np.zeros((hidden_layer_size, 1))
overall_loss = []

# Iterate over number of epochs
for epoch in range(num_epochs):
print(f"33[1m33[94mEpoch {epoch}")
print(f"33[1m33[92m=======")

# Sample one name
print(f"""Sampled name: {sample(parameters, idx_to_chars, chars_to_idx,
10).capitalize()}""")
print(f"Smoothed loss: {smoothed_loss:.4f}n")

# Shuffle examples
np.random.shuffle(examples)

# Iterate over all examples (SGD)
for example in examples:
x = [None] + [chars_to_idx[char] for char in example]
y = x[1:] + [chars_to_idx["n"]]
# Fwd pass
loss, cache = rnn_forward(x, y, h_prev, parameters)
# Compute smooth loss
smoothed_loss = smooth_loss(smoothed_loss, loss)
# Bwd passA
grads, h_prev = rnn_backward(y, parameters, cache)
# Update parameters
parameters, s = update_parameters_with_rmsprop(
parameters, grads, s)

overall_loss.append(smoothed_loss)

return parameters, overall_loss
def rnn_forward(x, y, h_prev, parameters):
Wxh, Whh, b = parameters["Wxh"], parameters["Whh"], parameters["b"]
Why, c = parameters["Why"], parameters["c"]
xs, hs, os, probs = {}, {}, {}, {}
xs[0] = np.zeros((vocab_size, 1))
loss = 0
hs[-1] = np.copy(h_prev)
for t in range(len(x)):
if t > 0:
xs[t] = np.zeros((vocab_size, 1))
xs[t][x[t]] = 1
hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t - 1]) + b)
os[t] = np.dot(Why, hs[t]) + c
probs[t] = softmax(os[t])
loss -= np.log(probs[t][y[t], 0])
cache = (xs, hs, probs)
return loss, cache
def clip_gradients(gradients, max_value):
for grad in gradients.keys():
np.clip(gradients[grad], -max_value, max_value, out=gradients[grad])
return gradients

def rnn_backward(y, parameters, cache):
# Retrieve xs, hs, and probs
xs, hs, probs = cache

# Initialize all gradients to zero
dh_next = np.zeros_like(hs[0])
parameters_names = ["Whh", "Wxh", "b", "Why", "c"]
grads = {}
for param_name in parameters_names:
grads["d" + param_name] = np.zeros_like(parameters[param_name])

# Iterate over all time steps in reverse order starting from Tx
for t in reversed(range(len(xs))):
dy = np.copy(probs[t])
dy[y[t]] -= 1
grads["dWhy"] += np.dot(dy, hs[t].T)
grads["dc"] += dy
dh = np.dot(parameters["Why"].T, dy) + dh_next
dhraw = (1 - hs[t] ** 2) * dh
grads["dWhh"] += np.dot(dhraw, hs[t - 1].T)
grads["dWxh"] += np.dot(dhraw, xs[t].T)
grads["db"] += dhraw
dh_next = np.dot(parameters["Whh"].T, dhraw)
# Clip the gradients using [-5, 5] as the interval
grads = clip_gradients(grads, 5)

# Get the last hidden state
h_prev = hs[len(xs) - 1]
return grads, h_prev
def sample(parameters, idx_to_chars, chars_to_idx, n):
# Retrienve parameters, shapes, and vocab size
Whh, Wxh, b = parameters["Whh"], parameters["Wxh"], parameters["b"]
Why, c = parameters["Why"], parameters["c"]
n_h, n_x = Wxh.shape
vocab_size = c.shape[0]

# Initialize a0 and x1 to zero vectors
h_prev = np.zeros((n_h, 1))
x = np.zeros((n_x, 1))
# Initialize empty sequence
indices = []
idx = -1
counter = 0
while (counter <= n and idx != chars_to_idx["n"]):
# Fwd propagation
h = np.tanh(np.dot(Whh, h_prev) + np.dot(Wxh, x) + b)
o = np.dot(Why, h) + c
probs = softmax(o)

# Sample the index of the character using generated probs distribution
idx = np.random.choice(vocab_size, p=probs.ravel())

# Get the character of the sampled index
char = idx_to_chars[idx]

# Add the char to the sequence
indices.append(idx)

# Update a_prev and x
h_prev = np.copy(h)
x = np.zeros((n_x, 1))
x[idx] = 1

counter += 1
sequence = "".join([idx_to_chars[idx] for idx in indices if idx != 0])
return sequence
def model(
file_path, chars_to_idx, idx_to_chars, hidden_layer_size, vocab_size,
num_epochs=10, learning_rate=0.01):
# Get the data
with open(file_path) as f:
data = f.readlines()
examples = [x.lower().strip() for x in data]

# Initialize parameters
parameters = initialize_parameters(vocab_size, hidden_layer_size)

# Initialize Adam parameters
s = initialize_rmsprop(parameters)

# Initialize loss
smoothed_loss = -np.log(1 / vocab_size) * 7

# Initialize hidden state h0 and overall loss
h_prev = np.zeros((hidden_layer_size, 1))
overall_loss = []

# Iterate over number of epochs
for epoch in range(num_epochs):
print(f"33[1m33[94mEpoch {epoch}")
print(f"33[1m33[92m=======")

# Sample one name
print(f"""Sampled name: {sample(parameters, idx_to_chars, chars_to_idx,
10).capitalize()}""")
print(f"Smoothed loss: {smoothed_loss:.4f}n")

# Shuffle examples
np.random.shuffle(examples)

# Iterate over all examples (SGD)
for example in examples:
x = [None] + [chars_to_idx[char] for char in example]
y = x[1:] + [chars_to_idx["n"]]
# Fwd pass
loss, cache = rnn_forward(x, y, h_prev, parameters)
# Compute smooth loss
smoothed_loss = smooth_loss(smoothed_loss, loss)
# Bwd pass
grads, h_prev = rnn_backward(y, parameters, cache)
# Update parameters
parameters, s = update_parameters_with_rmsprop(
parameters, grads, s)

overall_loss.append(smoothed_loss)
return parameters, overall_loss
# Load names
data = open("rnn.txt", "r").read()

# Convert characters to lower case
data = data.lower()

# Construct vocabulary using unique characters, sort it in ascending order,
# then construct two dictionaries that maps character to index and index to
# characters.
chars = list(sorted(set(data)))
chars_to_idx = {ch:i for i, ch in enumerate(chars)}
idx_to_chars = {i:ch for ch, i in chars_to_idx.items()}

# Get the size of the data and vocab size
data_size = len(data)
vocab_size = len(chars_to_idx)
print(f"There are {data_size} characters and {vocab_size} unique characters.")

# Fitting the model
parameters, loss = model("/content/rnn.txt", chars_to_idx, idx_to_chars, 100, vocab_size, 10, 0.01)

# Plotting the loss
plt.plot(range(len(loss)), loss)
plt.xlabel("Epochs")
plt.ylabel("Smoothed loss");

loss = list()
for i in range(5):
# fit model for one epoch on this sequence
hist = model.fit(X, y, batch_size=1000, verbose=1, epochs=1, validation_split=0.2)
loss.append(hist.history['loss'][0])
return loss

Notes.io is a web-based application for taking notes. You can take your notes and share with others people. If you like taking long notes, notes.io is designed for you. To date, over 8,000,000,000 notes created and continuing...

With notes.io;

* You can take a note from anywhere and any device with internet connection.
* You can share the notes in social platforms (YouTube, Facebook, Twitter, instagram etc.).
* You can quickly share your contents without website, blog and e-mail.
* You don't need to create any Account to share a note. As you wish you can use quick, easy and best shortened notes with sms, websites, e-mail, or messaging services (WhatsApp, iMessage, Telegram, Signal).
* Notes.io has fabulous infrastructure design for a short link and allows you to share the note as an easy and understandable link.

Fast: Notes.io is built for speed and performance. You can take a notes quickly and browse your archive.

Easy: Notes.io doesn’t require installation. Just write and share note!

Short: Notes.io’s url just 8 character. You’ll get shorten link of your note when you want to share. (Ex: notes.io/q )

Free: Notes.io works for 12 years and has been free since the day it was started.

You immediately create your first note and start sharing with the ones you wish. If you want to contact us, you can use the following communication channels;

Email: [email protected]

Twitter: http://twitter.com/notesio

Instagram: http://instagram.com/notes.io

Facebook: http://facebook.com/notesio

Regards;
Notes.io Team

Notes

Notes - notes.io

Shortened Note Link

Long File

Notes