ResNetを使ってみよう!
すごく久しぶりの投稿です。
5月から所属部署の変更があり、バタバタしてましたw(今もですが)
せっかく前回、Machine Learning のHello World をやったので、流行り(?)の ResNet を使ってみましょう。
ResNet
ResNet (Residual Network) は Microsoft Research の Kaiming He さんが2015年に考案したニューラルネットワークのモデルらしい。
2014年の画像認識の分野でトップを争う ImageNet コンペティションにおいて、1位だった GoogLeNet は 22層。ところが、翌年には GoogLeNet スゲーってなってたとこに、まさかの 152層 が出てきて(゚д゚)フォォォァァァァァァァォァオァオァオアオアォアォアア!ってなったみたいです。
ResNet の特徴
- とにかく層が深い
→ いろんな論文にも書いている通り、CNN では学習における層の深さはとても大事らしい。 - ショートカット用のルートがある
→ すごく長いニューラルネットワークの入力層に近いノードでは、色々あって勾配消失が発生し学習が止まったり、速度が低下したりしてしまうらしく、それを解決する為の方法との事。
ResNet の弱点
- すごく強いマシンが欲しい
→ 我が家のマシンは GPU GeForce Quadro P6000 / メモリ 64G ですが、それでも入力が大きいと 152層はしんどいです。 - 学習に時間がかかる
→ 11class、152層、50 Epoc とかでも5~6時間くらいかかる。
待っている間は何もしたくなくなる事もあるかもしれない。
とりあえず、メモ程度ですが学習用のコードを...。
参考にさせて頂いた無限ノック。
main.py
from __future__ import absolute_import, division, print_function, unicode_literals import tensorflow as tf from tensorflow.keras.models import Sequential,Model from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Input, BatchNormalization, concatenate, AveragePooling2D, Add, SeparableConv2D from tensorflow.keras import optimizers from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau, CSVLogger, TensorBoard from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array # 好きなモデルを呼び出す import ResNet152 as resnet import seaborn as sns import csv import os import numpy as np import matplotlib.pyplot as plt import json import collections as cl import argparse import codecs from datetime import datetime def PrintLog(log): print(log,file=codecs.open('./Log/print.log','a','utf-8')) print(log) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: for k in range(len(physical_devices)): tf.config.experimental.set_memory_growth(physical_devices[k], True) print('memory growth:', tf.config.experimental.get_memory_growth(physical_devices[k])) else: print("Not enough GPU hardware devices available") PATH = r'D:\datasets\original' train_dir = os.path.join(PATH, 'train') validation_dir = os.path.join(PATH, 'validation') # 学習用のサブディレクトリがクラスになる class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K'] class_names.sort() train_dirs = [] validation_dirs = [] for name in class_names: train_dirs.append(os.path.join(train_dir, name)), validation_dirs.append(os.path.join(validation_dir, name)) num_tr = [] for trdir in train_dirs: num_tr.append(len(os.listdir(trdir))) num_val = [] for valdir in validation_dirs: num_val.append(len(os.listdir(valdir))) total_train = 0 for numtr in num_tr: total_train += numtr total_val = 0 for numval in num_val: total_val += numval PrintLog("--images-----------------------") for i in range(len(class_names)): PrintLog('class : {} images'.format(class_names[i])) PrintLog(' total training {}'.format(num_tr[i])) PrintLog(' total validation {}'.format(num_val[i])) PrintLog("-------------------------------") PrintLog("Total : images") PrintLog((" training ", total_train)) PrintLog((" validation ", total_val)) num_classes = 11 channel = 3 batch_size = 11 epochs = 50 IMG_WIDTH = 1080 IMG_HEIGHT = 1080 # 写真のセンターをいい感じに切り出す def crop_center(pil_img, crop_width, crop_height): img_width, img_height = pil_img.size return pil_img.crop(((img_width - crop_width) // 2, (img_height - crop_height) // 2, (img_width + crop_width) // 2, (img_height + crop_height) // 2)) # 写真の縦の長さで正方形に切り出す def crop_max_square(pil_img): return crop_center(pil_img, min(pil_img.size), min(pil_img.size)) def preprocess(x): im = array_to_img(x) im_new = crop_max_square(im) return img_to_array(im_new) # 学習データセットに変化をつける train_image_generator = ImageDataGenerator( rescale=1./255, rotation_range=5, width_shift_range=.15, height_shift_range=.15, zoom_range=0.5, brightness_range=[0.3,1.0], preprocessing_function=preprocess ) # 学習データのジェネレータ train_image_generator.mean = np.array([123.68/255,116.779/255,103.939/255], dtype=np.float32).reshape((1,1,3)) # 必要な大きさにリサイズする train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size, directory=train_dir, shuffle=True, target_size=(IMG_HEIGHT, IMG_WIDTH), classes=class_names, class_mode='categorical') # 検証データをクロップ validation_image_generator = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess) # 検証データのジェネレータ validation_image_generator.mean = np.array([123.68/255,116.779/255,103.939/255], dtype=np.float32).reshape((1,1,3)) val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size, directory=validation_dir, shuffle=False, target_size=(IMG_HEIGHT, IMG_WIDTH), classes=class_names, class_mode='categorical') # クラスをJsonに書き出しとく dic = cl.OrderedDict() for class_index in train_data_gen.class_indices: dic[class_index] = cl.OrderedDict({"index":train_data_gen.class_indices[class_index],"name":class_index}) with open('./model/class_map.json','w') as fw: json.dump(dic,fw,indent=2) #モデルの作成 model = resnet.ResFunction(IMG_HEIGHT, IMG_WIDTH, channel, num_classes) #ラーニングレートを可変にする def lr_schedule(epoch): lr = 1e-3 if epoch > 44: lr *= 1e-6 elif epoch > 39: lr *= 1e-5 elif epoch > 34: lr *= 1e-4 elif epoch > 29: lr *= 1e-3 elif epoch > 24: lr *= 1e-2 elif epoch > 19: lr *= 1e-1 PrintLog('Next Epoc: {}, Learning rate: {}'.format(epoch + 1, lr)) return lr # ADAM オプティマイザーと binary cross entropy 損失関数を選択 # 各学習エポックの学習と検証の精度を表示するために、metrics 引数を渡す decay=1e-6 momentum=9e-1 model.compile(optimizer=optimizers.SGD(lr_schedule(0), momentum=momentum, decay=decay, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']) # modelのネットワークレイヤーを見たい場合の関数 with open('./Log/network_layer','a') as f: model.summary(print_fn = lambda x: f.write(x + '\r\n')) # モデルの学習 # ImageDataGenerator クラスの fit_generator メソッドを使用して、ネットワークを学習します。 # CallBackにモデル補正用のメソッドを指定してラーニングレートを調整する save_dir = os.path.join(os.getcwd(), 'saved_models') model_name = 'ResNetv152_model.{epoch:03d}.h5' if not os.path.isdir(save_dir): os.makedirs(save_dir) filepath = os.path.join(save_dir, model_name) checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_accuracy', verbose=1, save_best_only=False) lr_scheduler = LearningRateScheduler(lr_schedule) #各エポックの結果をcsvへ保存する csv_logger = CSVLogger(filename='./Log/training.log', separator=',', append=True) callbacks = [checkpoint, lr_scheduler, csv_logger] history = model.fit( train_data_gen, epochs=epochs, validation_data=val_data_gen, validation_steps=total_val // batch_size, steps_per_epoch=total_train // batch_size, callbacks=callbacks ) #モデルを保存 model.save('./model') #Confusion_Matrixを出力 Y_pred = model.predict(val_data_gen) y_pred = np.argmax(Y_pred, axis=1) PrintLog('\r\nConfusion Matrix\r\n') cm = tf.math.confusion_matrix(val_data_gen.classes, y_pred) PrintLog(cm) sns.heatmap(cm, annot=True, fmt='d', cbar=False, cmap='Blues') plt.savefig('./model/confusion_matrix.png') # 学習結果の可視化 epochs_range = range(epochs) acc = history.history['accuracy'] val_acc = history.history['val_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] plt.figure(figsize=(8, 8)) plt.subplot(1, 2, 1) plt.plot(epochs_range, acc, label='Training Accuracy') plt.plot(epochs_range, val_acc, label='Validation Accuracy') plt.legend(loc='lower right') plt.title('Training and Validation Accuracy') plt.subplot(1, 2, 2) plt.plot(epochs_range, loss, label='Training Loss') plt.plot(epochs_range, val_loss, label='Validation Loss') plt.legend(loc='upper right') plt.title('Training and Validation Loss') plt.savefig('./model/figure.png') plt.show()
ResNet152.py
from __future__ import absolute_import, division, print_function, unicode_literals import tensorflow as tf from tensorflow.keras.models import Sequential,Model from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Input, BatchNormalization, concatenate, GlobalAveragePooling2D, Add, SeparableConv2D from tensorflow.keras.preprocessing.image import ImageDataGenerator def ResFunction(IMG_HEIGHT, IMG_WIDTH, channel, num_classes): def ResBlock(x, in_f, f_1, out_f, stride=1, name="res"): res_x = Conv2D(f_1, [1, 1], strides=stride, padding='same', activation=None, name=name + "_conv1")(x) res_x = BatchNormalization(name=name + "_bn1")(res_x) res_x = Activation("relu")(res_x) res_x = Conv2D(f_1, [3, 3], strides=1, padding='same', activation=None, name=name + "_conv2")(res_x) res_x = BatchNormalization(name=name + "_bn2")(res_x) res_x = Activation("relu")(res_x) res_x = Conv2D(out_f, [1, 1], strides=1, padding='same', activation=None, name=name + "_conv3")(res_x) res_x = BatchNormalization(name=name + "_bn3")(res_x) res_x = Activation("relu")(res_x) if in_f != out_f: x = Conv2D(out_f, [1, 1], strides=1, padding="same", activation=None, name=name + "_conv_sc")(x) x = BatchNormalization(name=name + "_bn_sc")(x) x = Activation("relu")(x) if stride == 2: x = MaxPooling2D([2, 2], strides=2, padding="same")(x) x = Add()([res_x, x]) x = Activation("relu")(x) return x inputs = Input((IMG_HEIGHT, IMG_WIDTH, channel)) x = inputs x = Conv2D(64, [7, 7], strides=3, padding='same', activation=None, name="conv1")(x) x = BatchNormalization(name="bn1")(x) x = Activation("relu")(x) x = MaxPooling2D([5, 5], strides=3, padding='same')(x) x = ResBlock(x, 64, 64, 256, name="res2_1") x = ResBlock(x, 256, 64, 256, name="res2_2") x = ResBlock(x, 256, 64, 256, name="res2_3") x = ResBlock(x, 256, 128, 512, stride=2, name="res3_1") # 各ブロックの繰り返し数 for i in range(7): x = ResBlock(x, 512, 128, 512, name="res3_{}".format(i + 2)) x = ResBlock(x, 512, 256, 1024, stride=2, name="res4_1") for i in range(35): x = ResBlock(x, 1024, 256, 1024, name="res4_{}".format(i + 2)) x = ResBlock(x, 1024, 512, 2048, stride=2, name="res5_1") x = ResBlock(x, 2048, 256, 2048, name="res5_2") x = ResBlock(x, 2048, 256, 2048, name="res5_3") # 一つ上の層の出力に合わせて x = GlobalAveragePooling2D()(x) #x = Flatten()(x) x = Dense(num_classes, activation='softmax', name="fc")(x) model = Model(inputs=inputs, outputs=x) return model