안녕하세요.
여기로모여! 입니다.
오늘은 제가 요즘 다루고있는 주제인 딥러닝에 대해서 써보겠습니다.
바로 동물분류기 입니다.
테스트셋, 데이터셋 을 폴더경로만 바꿔주시면 좋은 결과를 얻으실 수 있습니다.
바로 시작해보겠습니다.
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
from sklearn.utils import class_weight, shuffle
from keras.models import load_model
from keras import applications
from keras import optimizers
from keras.utils import to_categorical
from keras.models import Sequential, Model, load_model
from keras.layers import Dropout, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
import tensorflow as tf
tf.__version__
#훈련 집합 경로수정 필요!!
foldernames = os.listdir('D:/Capstone/animals10/raw-img')
categories = []
files = []
i = 0
for k, folder in enumerate(foldernames):
filenames = os.listdir("D:/Capstone/animals10/raw-img/" + folder)
for file in filenames:
files.append("D:/Capstone/animals10/raw-img/" + folder + "/" + file)
categories.append(k)
df = pd.DataFrame({
'filename': files,
'category': categories
})
train_df = pd.DataFrame(columns=['filename', 'category'])
for i in range(10):
train_df = train_df.append(df[df.category == i].iloc[:500,:])
train_df.head()
train_df = train_df.reset_index(drop=True)
train_df
#테스트 집합 경로수정 필요!!
foldernames2 = os.listdir('D:/Capstone/test')
categories2 = []
files2 = []
i = 0
k = 0
filenames2 = os.listdir('D:/Capstone/test')
for file2 in filenames2:
files2.append("D:/Capstone/test/" + file2)
categories2.append(k)
test_df = pd.DataFrame({
'filename': files2,
'category': categories2
})
df = pd.DataFrame(columns=['filename', 'category'])
for i in range(10):
test_df = test_df.append(df[df.category == i].iloc[:500,:])
test_df.head()
test_df = test_df.reset_index(drop=True)
test_df
y = train_df['category']
x = train_df['filename']
y = train_df['category']
x, y = shuffle(x, y, random_state=8)
def centering_image(img):
size = [256,256]
img_size = img.shape[:2]
# centering
row = (size[1] - img_size[0]) // 2
col = (size[0] - img_size[1]) // 2
resized = np.zeros(list(size) + [img.shape[2]], dtype=np.uint8)
resized[row:(row + img.shape[0]), col:(col + img.shape[1])] = img
return resized
images = []
with tqdm(total=len(train_df)) as pbar:
for i, file_path in enumerate(train_df.filename.values):
#read image
img = cv2.imread(file_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#resize
if(img.shape[0] > img.shape[1]):
tile_size = (int(img.shape[1]*256/img.shape[0]),256)
else:
tile_size = (256, int(img.shape[0]*256/img.shape[1]))
#centering
img = centering_image(cv2.resize(img, dsize=tile_size))
#out put 224*224px
img = img[16:240, 16:240]
images.append(img)
pbar.update(1)
images = np.array(images)
print(train_df.filename.values[0])
print(test_df.filename.values[0])
print(images)
images2 = []
with tqdm(total=len(test_df)) as pbar:
for i, file_path in enumerate(test_df.filename.values):
#read image
img = cv2.imread(file_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#resize
if(img.shape[0] > img.shape[1]):
tile_size = (int(img.shape[1]*256/img.shape[0]),256)
else:
tile_size = (256, int(img.shape[0]*256/img.shape[1]))
#centering
img = centering_image(cv2.resize(img, dsize=tile_size))
#out put 224*224px
img = img[16:240, 16:240]
images2.append(img)
pbar.update(1)
images2 = np.array(images2)
print(images2)
rows,cols = 2,5
fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=(20,20))
for i in range(10):
path = train_df[train_df.category == i].values[2]
# image = cv2.imread(path[0])/
axes[i//cols, i%cols].set_title(path[0].split('/')[-2] + str(path[1]))
axes[i//cols, i%cols].imshow(images[train_df[train_df.filename == path[0]].index[0]])
data_num = len(y)
random_index = np.random.permutation(data_num)
x_shuffle = []
y_shuffle = []
for i in range(data_num):
x_shuffle.append(images[random_index[i]])
y_shuffle.append(y[random_index[i]])
x = np.array(x_shuffle)
y = np.array(y_shuffle)
val_split_num = int(round(0.2*len(y)))
x_train = x[val_split_num:]
y_train = y[val_split_num:]
x_test = x[:val_split_num]
y_test = y[:val_split_num]
print('x_train', x_train.shape)
print('y_train', y_train.shape)
print('x_test', x_test.shape)
print('y_test', y_test.shape)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
img_rows, img_cols, img_channel = 224, 224, 3
name_animal = []
for i in range(10):
path = train_df[train_df.category == i].values[2]
if path[0].split('/')[-2] == 'scoiattolo':
name_animal.append('squirrel')
elif path[0].split('/')[-2] == 'cavallo':
name_animal.append('horse')
elif path[0].split('/')[-2] == 'farfalla':
name_animal.append('butterfly')
elif path[0].split('/')[-2] == 'mucca':
name_animal.append('cow')
elif path[0].split('/')[-2] == 'gatto':
name_animal.append('cat')
elif path[0].split('/')[-2] == 'pecora':
name_animal.append('sheep')
elif path[0].split('/')[-2] == 'gallina':
name_animal.append('chicken')
elif path[0].split('/')[-2] == 'elefante':
name_animal.append('elephant')
elif path[0].split('/')[-2] == 'ragno':
name_animal.append('spider')
elif path[0].split('/')[-2] == 'cane':
name_animal.append('dog')
base_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(img_rows, img_cols, img_channel))
add_model = Sequential()
add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
add_model.add(Dense(256, activation='relu'))
add_model.add(Dense(10, activation='softmax'))
model = Model(inputs=base_model.input, outputs=add_model(base_model.output))
model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
metrics=['accuracy'])
model.summary()
batch_size = 32
epochs = 50
train_datagen = ImageDataGenerator(
rotation_range=30,
width_shift_range=0.1,
height_shift_range=0.1,
horizontal_flip=True)
train_datagen.fit(x_train)
#history = model.fit_generator(
# train_datagen.flow(x_train, y_train, batch_size=batch_size),
# steps_per_epoch=x_train.shape[0] // batch_size,
# epochs=epochs,
# validation_data=(x_test, y_test),
# callbacks=[ModelCheckpoint('VGG16-transferlearning.model', monitor='val_acc')]
#)
#그래프
print("CNN: Epochs={0:d}, Train accuracy={1:.5f}, Validation accuracy={2:.5f}".format(epochs,history.history['acc'][epochs-1],history.history['val_acc'][epochs-1]))
def show_plots(history):
""" Useful function to view plot of loss values & accuracies across the various epochs """
loss_vals = history['loss']
val_loss_vals = history['val_loss']
epochs = range(1, len(history['acc'])+1)
f, ax = plt.subplots(nrows=1,ncols=2,figsize=(16,4))
# plot losses on ax[0]
ax[0].plot(epochs, loss_vals, color='navy',marker='o', linestyle=' ', label='Training Loss')
ax[0].plot(epochs, val_loss_vals, color='firebrick', marker='*', label='Validation Loss')
ax[0].set_title('Training & Validation Loss')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].legend(loc='best')
ax[0].grid(True)
# plot accuracies
acc_vals = history['acc']
val_acc_vals = history['val_acc']
ax[1].plot(epochs, acc_vals, color='navy', marker='o', ls=' ', label='Training Accuracy')
ax[1].plot(epochs, val_acc_vals, color='firebrick', marker='*', label='Validation Accuracy')
ax[1].set_title('Training & Validation Accuracy')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy')
ax[1].legend(loc='best')
ax[1].grid(True)
plt.show()
plt.close()
# delete locals from heap before exiting
del loss_vals, val_loss_vals, epochs, acc_vals, val_acc_vals
show_plots(history.history)
test_images2 = []
for i in range(100):
path = test_df.values[i]
a2 = images2[test_df.index[i]]
img = np.array(a2)
img = img[:, :, ::-1].copy()
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
if(img.shape[0] > img.shape[1]):
tile_size = (int(img.shape[1]*256/img.shape[0]),256)
else:
tile_size = (256, int(img.shape[0]*256/img.shape[1]))
img = centering_image(cv2.resize(img, dsize=tile_size))
img = img[16:240, 16:240]
test_images2.append(img)
test_images2 = np.array(test_images2).reshape(-1,224,224,3)
something = model.predict(test_images2)
animals = name_animal
i = 0
for pred in something:
path = test_df.values[2]
plt.imshow(test_images2[i])
plt.show()
print('Actual :')
print('Predict :', animals[np.where(pred.max() == pred)[0][0]])
print('Values : ',pred.max())
i += 1
결과는
사람을 넣으면 이런 식으로 나온답니다.
데이터셋이나 테스트셋은 kaggle 홈페이지에서 구할 수 있으니 가서 한번 보시길 바랍니다.
'목 - 코딩&상품리뷰 다모여! > 코딩' 카테고리의 다른 글
[OpenGL]03. 그래픽 컬러 처리 (0) | 2020.06.05 |
---|---|
[OpenGL]02. 그래픽 하드웨어 (0) | 2020.05.29 |
01.Open GL 시작 및 연습문제 풀이 (0) | 2020.05.22 |
2020_03_21기준 Doit R 데이터 분석 고쳐야할점 KoNLP패키지 설치오류 (0) | 2020.05.15 |
conda 오류 : failed with initial frozen solve. Retrying with flexible solve. 해결법 (5) | 2020.04.29 |