ResNetを使ってみよう！

すごく久しぶりの投稿です。
5月から所属部署の変更があり、バタバタしてましたｗ(今もですが)

せっかく前回、Machine Learning のHello World をやったので、流行り(？)の ResNet を使ってみましょう。

rupic.hatenablog.com

ResNet

ResNet (Residual Network) は Microsoft Research の Kaiming He さんが2015年に考案したニューラルネットワークのモデルらしい。

2014年の画像認識の分野でトップを争う ImageNet コンペティションにおいて、1位だった GoogLeNet は 22層。ところが、翌年には GoogLeNet スゲーってなってたとこに、まさかの 152層が出てきて(ﾟдﾟ)フォォォァァァァァァァォァオァオァオアオアォアォアア！ってなったみたいです。

ResNet の特徴

とにかく層が深い
　→ いろんな論文にも書いている通り、CNN では学習における層の深さはとても大事らしい。
ショートカット用のルートがある
　→ すごく長いニューラルネットワークの入力層に近いノードでは、色々あって勾配消失が発生し学習が止まったり、速度が低下したりしてしまうらしく、それを解決する為の方法との事。

ResNet の弱点

すごく強いマシンが欲しい
　→ 我が家のマシンは GPU GeForce Quadro P6000 / メモリ 64G ですが、それでも入力が大きいと 152層はしんどいです。
学習に時間がかかる
　→ 11class、152層、50 Epoc とかでも5～6時間くらいかかる。
　　待っている間は何もしたくなくなる事もあるかもしれない。

en.wikipedia.org

とりあえず、メモ程度ですが学習用のコードを...。
参考にさせて頂いた無限ノック。

github.com

main.py

from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Input, BatchNormalization, concatenate, AveragePooling2D, Add, SeparableConv2D
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau, CSVLogger, TensorBoard
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array

# 好きなモデルを呼び出す
import ResNet152 as resnet

import seaborn as sns
import csv
import os
import numpy as np
import matplotlib.pyplot as plt
import json
import collections as cl
import argparse
import codecs
from datetime import datetime

def PrintLog(log):
    print(log,file=codecs.open('./Log/print.log','a','utf-8'))
    print(log)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    for k in range(len(physical_devices)):
        tf.config.experimental.set_memory_growth(physical_devices[k], True)
        print('memory growth:', tf.config.experimental.get_memory_growth(physical_devices[k]))
else:
    print("Not enough GPU hardware devices available")


PATH = r'D:\datasets\original'

train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')

# 学習用のサブディレクトリがクラスになる

class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K']
class_names.sort()

train_dirs = []
validation_dirs = []
for name in class_names:
    train_dirs.append(os.path.join(train_dir, name)),
    validation_dirs.append(os.path.join(validation_dir, name))

num_tr = []
for trdir in train_dirs:
    num_tr.append(len(os.listdir(trdir)))

num_val = []
for valdir in validation_dirs:
    num_val.append(len(os.listdir(valdir)))

total_train = 0
for numtr in num_tr:
    total_train += numtr


total_val = 0
for numval in num_val:
    total_val += numval


PrintLog("--images-----------------------")
for i in range(len(class_names)):
    PrintLog('class : {} images'.format(class_names[i]))
    PrintLog('        total training   {}'.format(num_tr[i]))
    PrintLog('        total validation {}'.format(num_val[i]))


PrintLog("-------------------------------")
PrintLog("Total : images")
PrintLog(("        training   ", total_train))
PrintLog(("        validation ", total_val))


num_classes = 11
channel = 3
batch_size = 11

epochs = 50
IMG_WIDTH = 1080
IMG_HEIGHT = 1080


# 写真のセンターをいい感じに切り出す
def crop_center(pil_img, crop_width, crop_height):
    img_width, img_height = pil_img.size
    return pil_img.crop(((img_width - crop_width) // 2,
                        (img_height - crop_height) // 2, 
                        (img_width + crop_width) // 2, 
                        (img_height + crop_height) // 2))

# 写真の縦の長さで正方形に切り出す
def crop_max_square(pil_img):
    return crop_center(pil_img, min(pil_img.size), min(pil_img.size))

def preprocess(x):
    im = array_to_img(x)
    im_new = crop_max_square(im)    
    return img_to_array(im_new)

# 学習データセットに変化をつける
train_image_generator = ImageDataGenerator(
                        rescale=1./255,
                        rotation_range=5,
                        width_shift_range=.15,
                        height_shift_range=.15,
                        zoom_range=0.5,
                        brightness_range=[0.3,1.0],
                        preprocessing_function=preprocess
                        ) # 学習データのジェネレータ

train_image_generator.mean = np.array([123.68/255,116.779/255,103.939/255], dtype=np.float32).reshape((1,1,3))

# 必要な大きさにリサイズする
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                           classes=class_names,
                                                           class_mode='categorical')


# 検証データをクロップ
validation_image_generator = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess) # 検証データのジェネレータ
validation_image_generator.mean = np.array([123.68/255,116.779/255,103.939/255], dtype=np.float32).reshape((1,1,3))

val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
                                                              directory=validation_dir,
                                                              shuffle=False,
                                                              target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                              classes=class_names,
                                                              class_mode='categorical')

# クラスをJsonに書き出しとく
dic = cl.OrderedDict()
for class_index in train_data_gen.class_indices:
    dic[class_index] = cl.OrderedDict({"index":train_data_gen.class_indices[class_index],"name":class_index})

with open('./model/class_map.json','w') as fw:
    json.dump(dic,fw,indent=2)


#モデルの作成
model = resnet.ResFunction(IMG_HEIGHT, IMG_WIDTH, channel, num_classes)

#ラーニングレートを可変にする
def lr_schedule(epoch):
    lr = 1e-3
   
    if epoch > 44:
        lr *= 1e-6
    elif epoch > 39:
        lr *= 1e-5
    elif epoch > 34:
        lr *= 1e-4
    elif epoch > 29:
        lr *= 1e-3
    elif epoch > 24:
        lr *= 1e-2
    elif epoch > 19:
        lr *= 1e-1

    PrintLog('Next Epoc: {}, Learning rate: {}'.format(epoch + 1, lr))
    return lr


# ADAM オプティマイザーと binary cross entropy 損失関数を選択
# 各学習エポックの学習と検証の精度を表示するために、metrics 引数を渡す
decay=1e-6
momentum=9e-1

model.compile(optimizer=optimizers.SGD(lr_schedule(0), momentum=momentum, decay=decay, nesterov=True),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# modelのネットワークレイヤーを見たい場合の関数
with open('./Log/network_layer','a') as f:
    model.summary(print_fn = lambda x: f.write(x + '\r\n'))

# モデルの学習

# ImageDataGenerator クラスの fit_generator メソッドを使用して、ネットワークを学習します。
# CallBackにモデル補正用のメソッドを指定してラーニングレートを調整する
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'ResNetv152_model.{epoch:03d}.h5'
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_accuracy',
                             verbose=1,
                             save_best_only=False)

lr_scheduler = LearningRateScheduler(lr_schedule)

#各エポックの結果をcsvへ保存する
csv_logger = CSVLogger(filename='./Log/training.log',
                       separator=',',
                       append=True)

callbacks = [checkpoint, lr_scheduler, csv_logger]

history = model.fit(
    train_data_gen,
    epochs=epochs,
    validation_data=val_data_gen,
    validation_steps=total_val // batch_size,
    steps_per_epoch=total_train // batch_size,
    callbacks=callbacks
)

#モデルを保存
model.save('./model')

#Confusion_Matrixを出力
Y_pred = model.predict(val_data_gen)
y_pred = np.argmax(Y_pred, axis=1)

PrintLog('\r\nConfusion Matrix\r\n')

cm = tf.math.confusion_matrix(val_data_gen.classes, y_pred)
PrintLog(cm)
sns.heatmap(cm, annot=True, fmt='d', cbar=False, cmap='Blues')
plt.savefig('./model/confusion_matrix.png')

# 学習結果の可視化
epochs_range = range(epochs)

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.savefig('./model/figure.png')
plt.show()

ResNet152.py

from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Input, BatchNormalization, concatenate, GlobalAveragePooling2D, Add, SeparableConv2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def ResFunction(IMG_HEIGHT, IMG_WIDTH, channel, num_classes):

    def ResBlock(x, in_f, f_1, out_f, stride=1, name="res"):
        res_x = Conv2D(f_1, [1, 1], strides=stride, padding='same', activation=None, name=name + "_conv1")(x)
        res_x = BatchNormalization(name=name + "_bn1")(res_x)
        res_x = Activation("relu")(res_x)

        res_x = Conv2D(f_1, [3, 3], strides=1, padding='same', activation=None, name=name + "_conv2")(res_x)
        res_x = BatchNormalization(name=name + "_bn2")(res_x)
        res_x = Activation("relu")(res_x)

        res_x = Conv2D(out_f, [1, 1], strides=1, padding='same', activation=None, name=name + "_conv3")(res_x)
        res_x = BatchNormalization(name=name + "_bn3")(res_x)
        res_x = Activation("relu")(res_x)

        if in_f != out_f:
            x = Conv2D(out_f, [1, 1], strides=1, padding="same", activation=None, name=name + "_conv_sc")(x)
            x = BatchNormalization(name=name + "_bn_sc")(x)
            x = Activation("relu")(x)

        if stride == 2:
            x = MaxPooling2D([2, 2], strides=2, padding="same")(x)
        
        x = Add()([res_x, x])
        x = Activation("relu")(x)

        return x
        
    
    inputs = Input((IMG_HEIGHT, IMG_WIDTH, channel))
    x = inputs
    
    x = Conv2D(64, [7, 7], strides=3, padding='same', activation=None, name="conv1")(x)
    x = BatchNormalization(name="bn1")(x)
    x = Activation("relu")(x)
    x = MaxPooling2D([5, 5], strides=3, padding='same')(x)

    x = ResBlock(x, 64, 64, 256, name="res2_1")
    x = ResBlock(x, 256, 64, 256, name="res2_2")
    x = ResBlock(x, 256, 64, 256, name="res2_3")

    x = ResBlock(x, 256, 128, 512, stride=2, name="res3_1")

    # 各ブロックの繰り返し数
    for i in range(7):
        x = ResBlock(x, 512, 128, 512, name="res3_{}".format(i + 2))

    x = ResBlock(x, 512, 256, 1024, stride=2, name="res4_1")
    for i in range(35):
        x = ResBlock(x, 1024, 256, 1024, name="res4_{}".format(i + 2))

    x = ResBlock(x, 1024, 512, 2048, stride=2, name="res5_1")
    x = ResBlock(x, 2048, 256, 2048, name="res5_2")
    x = ResBlock(x, 2048, 256, 2048, name="res5_3")

    # 一つ上の層の出力に合わせて
    x = GlobalAveragePooling2D()(x)
    #x = Flatten()(x)
    x = Dense(num_classes, activation='softmax', name="fc")(x)

    model = Model(inputs=inputs, outputs=x)

    return model

るぴブロ

備忘録とかです('ω')