clustering.py
1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""
VGG16 모델과 K-Means Clustering을 이용하여 소리에 대한 데이터를 군집화 후 분
"""
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
import numpy as np
from sklearn.cluster import KMeans
import os, shutil, glob, os.path
from PIL import Image as pil_image
image.LOAD_TRUNCATED_IMAGES = True
model = VGG16(weights='imagenet', include_top=False)
imdir = 'PATH WHERE YOUR FILE IS'
targetdir = 'PATH WHERE YOUT FILE SAVE'
number_clusters = 2
# Loop over files and get features
filelist = glob.glob(os.path.join(imdir, '*.png'))
#filelist.sort()
featurelist = []
for i, imagepath in enumerate(filelist):
print(" Status: %s / %s" %(i, len(filelist)), end="\r")
img = image.load_img(imagepath, target_size=(370, 470))
img_data = image.img_to_array(img)
img_data = np.expand_dims(img_data, axis=0)
img_data = preprocess_input(img_data)
features = np.array(model.predict(img_data))
featurelist.append(features.flatten())
if i % 10:
print(i)
# K-Means 클러스터링
kmeans = KMeans(n_clusters=number_clusters, random_state=0).fit(np.array(featurelist))
# Copy with cluster name
print("\n")
for i, m in enumerate(kmeans.labels_):
print(" Copy: %s / %s" %(i, len(kmeans.labels_)), end="\r")
shutil.copy(filelist[i], targetdir + str(m) + "_" + str(i) + ".jpg")
if(i % 100) == 0:
print(str(i)+'is finished')