The Algorithms logo
The Algorithms
AboutDonate

02-Imdb-Binary-Classification

H
#Imports
from keras.datasets import imdb

from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics,activations

import matplotlib.pyplot as plt
C:\Users\Hussnain\Anaconda3\envs\tensorflow\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
#Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz

(xtrain,ytrain), (xtest, ytest) = imdb.load_data(num_words=10000)
Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
 1048576/17464789 [>.............................] - ETA: 53:49
#Exploring the dataset

print('xtrain shape', xtrain.shape)
print('ytrain shape', ytrain.shape)
print()
print('xtest shape', xtest.shape)
print('ytest shape', ytest.shape)
print()
print('xtrain first review as dictionary index', xtrain[1])
print()
print()
print('ytrain label', ytrain[0])
#index to words mapping
word_index = imdb.get_word_index()
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
decode_review = ' '.join([reverse_word_index.get(i-3, reverse_word_index.get(i)) for i in xtrain[22]])
decode_review
import numpy as np

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1. 
    return results

x_train = vectorize_sequences(xtrain)
x_test = vectorize_sequences(xtest)
ytrain = np.asarray(ytrain).astype('float32')
ytest = np.asarray(ytest).astype('float32')
#model
model = models.Sequential()
model.add(layers.Dense(16, activation=activations.relu, input_shape=(10000,)))
model.add(layers.Dense(16, activation=activations.relu))
model.add(layers.Dense(1, activation=activations.sigmoid))
model.compile(optimizer=optimizers.RMSprop(lr=0.0001), loss=losses.mse, metrics=['acc'])
x_val = x_train[:10000]
y_val = ytrain[:10000]

x_train_partial = x_train[10000:]
y_train_partial = ytrain[10000:]
history = model.fit(x_train_partial, y_train_partial, epochs=4, batch_size=512, validation_data=(x_val,y_val))
history_dict = history.history
history_dict.keys()
print(history.history['acc'][-1])
print(history.history['val_acc'][-1])
print(model.predict(x_train_partial[22:23]))
loss = history_dict['loss']
val_loss = history_dict['val_loss']
epochs = range(0, len(loss)+1)
epochs
%matplotlib
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

# "bo" is for "blue dot"
plt.plot(epochs, loss, 'ro', label='Training loss')
# b is for "solid blue line"
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()
plt.clf()      # clear figure# clear  
acc_values = history_dict['acc']
val_acc_values = history_dict['val_acc']

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()