本案例使用 1,024張『貓』、和 1,024張『狗』的照片來訓練一個可以辨識貓或狗的模型, 利用『轉換學習(Transfer Learning)』的方法,我們可以在很短的時間就做到92.5%的正確率。
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from scipy import ndimage, misc
n = 8; m = 4
img_file = ["data/train/cats/cat.{}.jpg".format(i) for i in range(16)] + [
"data/train/dogs/dog.{}.jpg".format(i) for i in range(16)]
plt.figure(figsize=(20, 12))
for i in range(m):
for j in range(n):
ax = plt.subplot(m, n, i*n + j + 1)
img = ndimage.imread(img_file[i*n + j], mode="RGB")
img = misc.imresize(img, (80, 80))
plt.imshow(img)
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
## This notebook is built around using tensorflow as the backend for keras
##Updated to Keras 2.0
from keras import backend
import os
import numpy as np
from keras.models import Sequential
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras import optimizers
from keras import applications
from keras.models import Model
# from PIL import Image
# import h5py
# dimensions of our images.
img_width, img_height = 150, 150
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
##preprocessing
# used to rescale the pixel values from [0, 255] to [0, 1] interval
datagen = ImageDataGenerator(rescale=1./255)
batch_size = 32
# automagically retrieve images and their classes for train and validation sets
train_generator = datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
validation_generator = datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
# a simple stack of 3 convolution layers with a ReLU activation and followed by max-pooling layers.
model = Sequential()
model.add(Convolution2D(32, (3, 3), input_shape=(img_width, img_height,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
epochs = 20
train_samples = 2048
validation_samples = 832
model.fit_generator(
train_generator,
steps_per_epoch=train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=validation_samples// batch_size,)
model.evaluate_generator(validation_generator, validation_samples)
model.save_weights('models/basic_cnn_20_epochs.h5')
train_datagen_augmented = ImageDataGenerator(
rescale=1./255, # normalize pixel values to [0,1]
shear_range=0.2, # randomly applies shearing transformation
zoom_range=0.2, # randomly applies shearing transformation
horizontal_flip=True) # randomly flip the images
# same code as before
train_generator_augmented = train_datagen_augmented.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
model.fit_generator(
train_generator_augmented,
steps_per_epoch=train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=validation_samples // batch_size,)
model.evaluate_generator(validation_generator, validation_samples)
model.save_weights('models/augmented_20_epochs.h5')
model_vgg = applications.VGG16(include_top=False, weights='imagenet')
train_generator_bottleneck = datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
validation_generator_bottleneck = datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
bottleneck_features_train = model_vgg.predict_generator(train_generator_bottleneck, train_samples // batch_size)
np.save(open('models/bottleneck_features_train.npy', 'wb'), bottleneck_features_train)
bottleneck_features_validation = model_vgg.predict_generator(validation_generator_bottleneck, validation_samples // batch_size)
np.save(open('models/bottleneck_features_validation.npy', 'wb'), bottleneck_features_validation)
train_data = np.load(open('models/bottleneck_features_train.npy', 'rb'))
train_labels = np.array([0] * (train_samples // 2) + [1] * (train_samples // 2))
validation_data = np.load(open('models/bottleneck_features_validation.npy', 'rb'))
validation_labels = np.array([0] * (validation_samples // 2) + [1] * (validation_samples // 2))
model_top = Sequential()
model_top.add(Flatten(input_shape=train_data.shape[1:]))
model_top.add(Dense(256, activation='relu'))
model_top.add(Dropout(0.5))
model_top.add(Dense(1, activation='sigmoid'))
model_top.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
model_top.fit(train_data, train_labels,
epochs=epochs,
batch_size=batch_size,
validation_data=(validation_data, validation_labels))
The training process of this small neural network is very fast : ~2s per epoch
model_top.save_weights('models/bottleneck_20_epochs.h5')
model_top.evaluate(validation_data, validation_labels)
Start by instantiating the VGG base and loading its weights.
model_vgg = applications.VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
Build a classifier model to put on top of the convolutional model. For the fine tuning, we start with a fully trained-classifer. We will use the weights from the earlier model. And then we will add this model on top of the convolutional base.
top_model = Sequential()
top_model.add(Flatten(input_shape=model_vgg.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))
top_model.load_weights('models/bottleneck_30_epochs.h5')
#model_vgg.add(top_model)
model = Model(inputs = model_vgg.input, outputs = top_model(model_vgg.output))
For fine turning, we only want to train a few layers. This line will set the first 25 layers (up to the conv block) to non-trainable
for layer in model.layers[:15]:
layer.trainable = False
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
model.compile(loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
metrics=['accuracy'])
# prepare data augmentation configuration . . . do we need this?
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
# fine-tune the model
model.fit_generator(
train_generator,
steps_per_epoch=train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=validation_samples // batch_size)
model.save_weights('models/finetuning_20epochs_vgg.h5')
Computing loss and accuracy :
model.evaluate_generator(validation_generator, validation_samples)