Notes

Notes - notes.io

lab9: Implement skip gram model to predict words within a certain range before and after the current word.

from nltk.corpus import gutenberg # to get bible corpus
from string import punctuation # to remove punctuation from corpus
import nltk
import re
import numpy as np
from keras.preprocessing import text
from keras.preprocessing.sequence import skipgrams
from keras.layers import *
from keras.layers.core import Dense, Reshape
from keras.layers import Embedding
from keras.models import Model,Sequential
nltk.download('gutenberg')
nltk.download('punkt')
nltk.download('stopwords')
stop_words = nltk.corpus.stopwords.words('english')
bible = gutenberg.sents("bible-kjv.txt")
remove_terms = punctuation + '0123456789'
bible
wpt = nltk.WordPunctTokenizer()
def normalize_document(doc):
# lower case and remove special characterswhitespaces
doc = re.sub(r'[^a-zA-Zs]', '', doc,re.I|re.A)
doc = doc.lower()
doc = doc.strip()
# tokenize document
tokens = wpt.tokenize(doc)
# filter stopwords out of document
filtered_tokens = [token for token in tokens if token not in stop_words]
# re-create document from filtered tokens
doc = ' '.join(filtered_tokens)
return doc

normalize_corpus = np.vectorize(normalize_document)
norm_bible = [[word.lower() for word in sent if word not in remove_terms] for sent in bible]
norm_bible = [' '.join(tok_sent) for tok_sent in norm_bible]
norm_bible = filter(None, normalize_corpus(norm_bible))
norm_bible = [tok_sent for tok_sent in norm_bible if len(tok_sent.split()) > 2]
tokenizer = text.Tokenizer()
tokenizer.fit_on_texts(norm_bible)# generate skip-grams
skip_grams = [skipgrams(wid, vocabulary_size=vocab_size, window_size=10) for wid in wids]
# view sample skip-grams
pairs, labels = skip_grams[0][0], skip_grams[0][1]
for i in range(10):
print("({:s} ({:d}), {:s} ({:d})) -> {:d}".format(
id2word[pairs[i][0]], pairs[i][0],
id2word[pairs[i][1]], pairs[i][1],
labels[i]))
word2id = tokenizer.word_index
id2word = {v:k for k, v in word2id.items()}
vocab_size = len(word2id) + 1
wids = [[word2id[w] for w in text.text_to_word_sequence(doc)] for doc in norm_bible]
print('Vocabulary Size:', vocab_size)
print('Vocabulary Sample:', list(word2id.items())[:5])
# build skip-gram architecture
embed_size = 100
word_model = Sequential()
word_model.add(Embedding(vocab_size, embed_size,
embeddings_initializer="glorot_uniform",
input_length=1))
word_model.add(Reshape((embed_size, )))

context_model = Sequential()
context_model.add(Embedding(vocab_size, embed_size,
embeddings_initializer="glorot_uniform",
input_length=1))
context_model.add(Reshape((embed_size,)))
merged_output = add([word_model.output, context_model.output])
model_combined = Sequential()
model_combined.add(Dense(1, kernel_initializer="glorot_uniform", activation="sigmoid"))
final_model = Model([word_model.input, context_model.input], model_combined(merged_output))
final_model.compile(loss="mean_squared_error", optimizer="rmsprop")
final_model.summary()
# visualize model structure
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(final_model, show_shapes=True, show_layer_names=False,
rankdir='TB').create(prog='dot', format='svg'))
for epoch in range(1, 3):
loss = 0
for i, elem in enumerate(skip_grams):
pair_first_elem = np.array(list(zip(*elem[0]))[0], dtype='int32')
pair_second_elem = np.array(list(zip(*elem[0]))[1], dtype='int32')
labels = np.array(elem[1], dtype='int32')
X = [pair_first_elem, pair_second_elem]
Y = labels
if i % 10000 == 0:
print('Processed {} (skip_first, skip_second, relevance) pairs'.format(i))
loss += final_model.train_on_batch(X,Y)

print('Epoch:', epoch, 'Loss:', loss)
from sklearn.metrics.pairwise import euclidean_distances
word_embed_layer = word_model.layers[0]
weights = word_embed_layer.get_weights()[0][1:]

distance_matrix = euclidean_distances(weights)
print(distance_matrix.shape)

similar_words = {search_term: [id2word[idx] for idx in distance_matrix[word2id[search_term]-1].argsort()[1:6]+1]
for search_term in ['god', 'jesus','egypt', 'john', 'famine']}

similar_words

Lab11: Building a RNN to perform Character level language modeling.

import numpy as np
import matplotlib.pyplot as plt

def initialize_parameters(vocab_size, hidden_layer_size):

parameters = {}
parameters["Whh"] = np.random.randn(
hidden_layer_size, hidden_layer_size) * 0.01
parameters["Wxh"] = np.random.randn(hidden_layer_size, vocab_size) * 0.01
parameters["b"] = np.zeros((hidden_layer_size, 1))
parameters["Why"] = np.random.randn(vocab_size, hidden_layer_size) * 0.01
parameters["c"] = np.zeros((vocab_size, 1))

return parameters

def initialize_adam(parameters):

parameters_names = ["Whh", "Wxh", "b", "Why", "c"]
v = {}
s = {}

for param_name in parameters_names:
v["d" + param_name] = np.zeros_like(parameters[param_name])
s["d" + param_name] = np.zeros_like(parameters[param_name])

return v, s

def initialize_rmsprop(parameters):

parameters_names = ["Whh", "Wxh", "b", "Why", "c"]
s = {}

for param_name in parameters_names:
s["d" + param_name] = np.zeros_like(parameters[param_name])

return s

def softmax(z):

e_z = np.exp(z)
probs = e_z / np.sum(e_z)

return probs

def rnn_forward(x, y, h_prev, parameters):

# Retrieve parameters
Wxh, Whh, b = parameters["Wxh"], parameters["Whh"], parameters["b"]
Why, c = parameters["Why"], parameters["c"]

# Initialize inputs, hidden state, output, and probabilities dictionaries
xs, hs, os, probs = {}, {}, {}, {}

# Initialize x0 to zero vector
xs[0] = np.zeros((vocab_size, 1))

# Initialize loss and assigns h_prev to last hidden state in hs
loss = 0
hs[-1] = np.copy(h_prev)

# Forward pass: loop over all characters of the name
for t in range(len(x)):
# Convert to one-hot vector
if t > 0:
xs[t] = np.zeros((vocab_size, 1))
xs[t][x[t]] = 1
# Hidden state
hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t - 1]) + b)
# Logits
os[t] = np.dot(Why, hs[t]) + c
# Probs
probs[t] = softmax(os[t])
# Loss
loss -= np.log(probs[t][y[t], 0])

cache = (xs, hs, probs)

return loss, cache

def smooth_loss(loss, current_loss):

return 0.999 * loss + 0.001 * current_loss

def clip_gradients(gradients, max_value):

for grad in gradients.keys():
np.clip(gradients[grad], -max_value, max_value, out=gradients[grad])

return gradients

def rnn_backward(y, parameters, cache):

# Retrieve xs, hs, and probs
xs, hs, probs = cache

# Initialize all gradients to zero
dh_next = np.zeros_like(hs[0])

parameters_names = ["Whh", "Wxh", "b", "Why", "c"]
grads = {}
for param_name in parameters_names:
grads["d" + param_name] = np.zeros_like(parameters[param_name])

# Iterate over all time steps in reverse order starting from Tx
for t in reversed(range(len(xs))):
dy = np.copy(probs[t])
dy[y[t]] -= 1
grads["dWhy"] += np.dot(dy, hs[t].T)
grads["dc"] += dy
dh = np.dot(parameters["Why"].T, dy) + dh_next
dhraw = (1 - hs[t] ** 2) * dh
grads["dWhh"] += np.dot(dhraw, hs[t - 1].T)
grads["dWxh"] += np.dot(dhraw, xs[t].T)
grads["db"] += dhraw
dh_next = np.dot(parameters["Whh"].T, dhraw)
# Clip the gradients using [-5, 5] as the interval
grads = clip_gradients(grads, 5)
# Get the last hidden state
h_prev = hs[len(xs) - 1]

return grads, h_prev

def update_parameters_with_adam(
parameters, grads, v, s, t, learning_rate, beta1=0.9, beta2=0.999,
epsilon=1e-8):

parameters_names = ["Whh", "Wxh", "b", "Why", "c"]
v_corrected = {}
s_corrected = {}

for param_name in parameters_names:
# Update the moving average of first gradient and squared gradient
v["d" + param_name] = beta1 * v["d" + param_name] +
(1 - beta1) * grads["d" + param_name]
s["d" + param_name] = beta2 * s["d" + param_name] +
(1 - beta2) * np.square(grads["d" + param_name])

# Compute the corrected-bias estimate of the moving averages
v_corrected["d" + param_name] = v["d" + param_name] / (1 - beta1**t)
s_corrected["d" + param_name] = s["d" + param_name] / (1 - beta2**t)

# update parameters
parameters[param_name] -= (learning_rate *
v_corrected["d" + param_name])
/ (np.sqrt(s_corrected["d" + param_name] + epsilon))

return parameters, v, s

def update_parameters(parameters, grads, learning_rate):
for param in parameters.keys():
parameters[param] -= learning_rate * grads["d" + param]

return parameters

def update_parameters_with_rmsprop(
parameters, grads, s, beta=0.9, learning_rate=0.001, epsilon=1e-8):

parameters_names = ["Whh", "Wxh", "b", "Why", "c"]

for param_name in parameters_names:
# Update exponential weighted average of squared gradients
s["d" + param_name] = beta * s["d" + param_name] +
(1 - beta) * np.square(grads["d" + param_name])

# Update parameters
parameters[param_name] -= (learning_rate * grads["d" + param_name])
/ (np.sqrt(s["d" + param_name] + epsilon))

return parameters, s

def sample(parameters, idx_to_chars, chars_to_idx, n):

# Retrienve parameters, shapes, and vocab size
Whh, Wxh, b = parameters["Whh"], parameters["Wxh"], parameters["b"]
Why, c = parameters["Why"], parameters["c"]
n_h, n_x = Wxh.shape
vocab_size = c.shape[0]

# Initialize a0 and x1 to zero vectors
h_prev = np.zeros((n_h, 1))
x = np.zeros((n_x, 1))

# Initialize empty sequence
indices = []
idx = -1
counter = 0
while (counter <= n and idx != chars_to_idx["n"]):
# Fwd propagation
h = np.tanh(np.dot(Whh, h_prev) + np.dot(Wxh, x) + b)
o = np.dot(Why, h) + c
probs = softmax(o)

# Sample the index of the character using generated probs distribution
idx = np.random.choice(vocab_size, p=probs.ravel())

# Get the character of the sampled index
char = idx_to_chars[idx]

# Add the char to the sequence
indices.append(idx)

# Update a_prev and x
h_prev = np.copy(h)
x = np.zeros((n_x, 1))
x[idx] = 1

counter += 1
sequence = "".join([idx_to_chars[idx] for idx in indices if idx != 0])

return sequence

def model(
file_path, chars_to_idx, idx_to_chars, hidden_layer_size, vocab_size,
num_epochs=10, learning_rate=0.01):

# Get the data
with open(file_path) as f:
data = f.readlines()
examples = [x.lower().strip() for x in data]

# Initialize parameters
parameters = initialize_parameters(vocab_size, hidden_layer_size)

# Initialize Adam parameters
s = initialize_rmsprop(parameters)

# Initialize loss
smoothed_loss = -np.log(1 / vocab_size) * 7

# Initialize hidden state h0 and overall loss
h_prev = np.zeros((hidden_layer_size, 1))
overall_loss = []

# Iterate over number of epochs
for epoch in range(num_epochs):
print(f"33[1m33[94mEpoch {epoch}")
print(f"33[1m33[92m=======")

# Sample one name
print(f"""Sampled name: {sample(parameters, idx_to_chars, chars_to_idx,
10).capitalize()}""")
print(f"Smoothed loss: {smoothed_loss:.4f}n")

# Shuffle examples
np.random.shuffle(examples)

# Iterate over all examples (SGD)
for example in examples:
x = [None] + [chars_to_idx[char] for char in example]
y = x[1:] + [chars_to_idx["n"]]
# Fwd pass
loss, cache = rnn_forward(x, y, h_prev, parameters)
# Compute smooth loss
smoothed_loss = smooth_loss(smoothed_loss, loss)
# Bwd passA
grads, h_prev = rnn_backward(y, parameters, cache)
# Update parameters
parameters, s = update_parameters_with_rmsprop(
parameters, grads, s)

overall_loss.append(smoothed_loss)

return parameters, overall_loss
def rnn_forward(x, y, h_prev, parameters):
Wxh, Whh, b = parameters["Wxh"], parameters["Whh"], parameters["b"]
Why, c = parameters["Why"], parameters["c"]
xs, hs, os, probs = {}, {}, {}, {}
xs[0] = np.zeros((vocab_size, 1))
loss = 0
hs[-1] = np.copy(h_prev)
for t in range(len(x)):
if t > 0:
xs[t] = np.zeros((vocab_size, 1))
xs[t][x[t]] = 1
hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t - 1]) + b)
os[t] = np.dot(Why, hs[t]) + c
probs[t] = softmax(os[t])
loss -= np.log(probs[t][y[t], 0])
cache = (xs, hs, probs)
return loss, cache
def clip_gradients(gradients, max_value):
for grad in gradients.keys():
np.clip(gradients[grad], -max_value, max_value, out=gradients[grad])
return gradients

def rnn_backward(y, parameters, cache):
# Retrieve xs, hs, and probs
xs, hs, probs = cache

# Initialize all gradients to zero
dh_next = np.zeros_like(hs[0])
parameters_names = ["Whh", "Wxh", "b", "Why", "c"]
grads = {}
for param_name in parameters_names:
grads["d" + param_name] = np.zeros_like(parameters[param_name])

# Iterate over all time steps in reverse order starting from Tx
for t in reversed(range(len(xs))):
dy = np.copy(probs[t])
dy[y[t]] -= 1
grads["dWhy"] += np.dot(dy, hs[t].T)
grads["dc"] += dy
dh = np.dot(parameters["Why"].T, dy) + dh_next
dhraw = (1 - hs[t] ** 2) * dh
grads["dWhh"] += np.dot(dhraw, hs[t - 1].T)
grads["dWxh"] += np.dot(dhraw, xs[t].T)
grads["db"] += dhraw
dh_next = np.dot(parameters["Whh"].T, dhraw)
# Clip the gradients using [-5, 5] as the interval
grads = clip_gradients(grads, 5)

# Get the last hidden state
h_prev = hs[len(xs) - 1]
return grads, h_prev
def sample(parameters, idx_to_chars, chars_to_idx, n):
# Retrienve parameters, shapes, and vocab size
Whh, Wxh, b = parameters["Whh"], parameters["Wxh"], parameters["b"]
Why, c = parameters["Why"], parameters["c"]
n_h, n_x = Wxh.shape
vocab_size = c.shape[0]

# Initialize a0 and x1 to zero vectors
h_prev = np.zeros((n_h, 1))
x = np.zeros((n_x, 1))
# Initialize empty sequence
indices = []
idx = -1
counter = 0
while (counter <= n and idx != chars_to_idx["n"]):
# Fwd propagation
h = np.tanh(np.dot(Whh, h_prev) + np.dot(Wxh, x) + b)
o = np.dot(Why, h) + c
probs = softmax(o)

# Sample the index of the character using generated probs distribution
idx = np.random.choice(vocab_size, p=probs.ravel())

# Get the character of the sampled index
char = idx_to_chars[idx]

# Add the char to the sequence
indices.append(idx)

# Update a_prev and x
h_prev = np.copy(h)
x = np.zeros((n_x, 1))
x[idx] = 1

counter += 1
sequence = "".join([idx_to_chars[idx] for idx in indices if idx != 0])
return sequence
def model(
file_path, chars_to_idx, idx_to_chars, hidden_layer_size, vocab_size,
num_epochs=10, learning_rate=0.01):
# Get the data
with open(file_path) as f:
data = f.readlines()
examples = [x.lower().strip() for x in data]

# Initialize parameters
parameters = initialize_parameters(vocab_size, hidden_layer_size)

# Initialize Adam parameters
s = initialize_rmsprop(parameters)

# Initialize loss
smoothed_loss = -np.log(1 / vocab_size) * 7

# Initialize hidden state h0 and overall loss
h_prev = np.zeros((hidden_layer_size, 1))
overall_loss = []

# Iterate over number of epochs
for epoch in range(num_epochs):
print(f"33[1m33[94mEpoch {epoch}")
print(f"33[1m33[92m=======")

# Sample one name
print(f"""Sampled name: {sample(parameters, idx_to_chars, chars_to_idx,
10).capitalize()}""")
print(f"Smoothed loss: {smoothed_loss:.4f}n")

# Shuffle examples
np.random.shuffle(examples)

# Iterate over all examples (SGD)
for example in examples:
x = [None] + [chars_to_idx[char] for char in example]
y = x[1:] + [chars_to_idx["n"]]
# Fwd pass
loss, cache = rnn_forward(x, y, h_prev, parameters)
# Compute smooth loss
smoothed_loss = smooth_loss(smoothed_loss, loss)
# Bwd pass
grads, h_prev = rnn_backward(y, parameters, cache)
# Update parameters
parameters, s = update_parameters_with_rmsprop(
parameters, grads, s)

overall_loss.append(smoothed_loss)
return parameters, overall_loss
# Load names
data = open("rnn.txt", "r").read()

# Convert characters to lower case
data = data.lower()

# Construct vocabulary using unique characters, sort it in ascending order,
# then construct two dictionaries that maps character to index and index to
# characters.
chars = list(sorted(set(data)))
chars_to_idx = {ch:i for i, ch in enumerate(chars)}
idx_to_chars = {i:ch for ch, i in chars_to_idx.items()}

# Get the size of the data and vocab size
data_size = len(data)
vocab_size = len(chars_to_idx)
print(f"There are {data_size} characters and {vocab_size} unique characters.")

# Fitting the model
parameters, loss = model("/content/rnn.txt", chars_to_idx, idx_to_chars, 100, vocab_size, 10, 0.01)

# Plotting the loss
plt.plot(range(len(loss)), loss)
plt.xlabel("Epochs")
plt.ylabel("Smoothed loss");

loss = list()
for i in range(5):
# fit model for one epoch on this sequence
hist = model.fit(X, y, batch_size=1000, verbose=1, epochs=1, validation_split=0.2)
loss.append(hist.history['loss'][0])
return loss

Lab 12: Build a LSTM network for Named Entity recognition

Notes.io is a web-based application for taking notes. You can take your notes and share with others people. If you like taking long notes, notes.io is designed for you. To date, over 8,000,000,000 notes created and continuing...

With notes.io;

* You can take a note from anywhere and any device with internet connection.
* You can share the notes in social platforms (YouTube, Facebook, Twitter, instagram etc.).
* You can quickly share your contents without website, blog and e-mail.
* You don't need to create any Account to share a note. As you wish you can use quick, easy and best shortened notes with sms, websites, e-mail, or messaging services (WhatsApp, iMessage, Telegram, Signal).
* Notes.io has fabulous infrastructure design for a short link and allows you to share the note as an easy and understandable link.

Fast: Notes.io is built for speed and performance. You can take a notes quickly and browse your archive.

Easy: Notes.io doesn’t require installation. Just write and share note!

Short: Notes.io’s url just 8 character. You’ll get shorten link of your note when you want to share. (Ex: notes.io/q )

Free: Notes.io works for 12 years and has been free since the day it was started.

You immediately create your first note and start sharing with the ones you wish. If you want to contact us, you can use the following communication channels;

Email: [email protected]

Twitter: http://twitter.com/notesio

Instagram: http://instagram.com/notes.io

Facebook: http://facebook.com/notesio

Regards;
Notes.io Team

Notes

Notes - notes.io

Shortened Note Link

Long File

Notes