이민호

upload 보고서, codes

Showing 156 changed files with 5303 additions and 0 deletions
*.pyc
__pycache__/
/data/
\ No newline at end of file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
from functools import partial
from multiprocessing import Pool
import os
import re
import cropper
import numpy as np
import tqdm
# ==============================================================================
# = param =
# ==============================================================================
parser = argparse.ArgumentParser()
# main
parser.add_argument('--img_dir', dest='img_dir', default='./data/img_celeba')
parser.add_argument('--save_dir', dest='save_dir', default='./data/aligned')
parser.add_argument('--landmark_file', dest='landmark_file', default='./data/landmark.txt')
parser.add_argument('--standard_landmark_file', dest='standard_landmark_file', default='./data/standard_landmark_68pts.txt')
parser.add_argument('--crop_size_h', dest='crop_size_h', type=int, default=572)
parser.add_argument('--crop_size_w', dest='crop_size_w', type=int, default=572)
parser.add_argument('--move_h', dest='move_h', type=float, default=0.25)
parser.add_argument('--move_w', dest='move_w', type=float, default=0.)
parser.add_argument('--save_format', dest='save_format', choices=['jpg', 'png'], default='jpg')
parser.add_argument('--n_worker', dest='n_worker', type=int, default=8)
# others
parser.add_argument('--face_factor', dest='face_factor', type=float, help='The factor of face area relative to the output image.', default=0.45)
parser.add_argument('--align_type', dest='align_type', choices=['affine', 'similarity'], default='similarity')
parser.add_argument('--order', dest='order', type=int, choices=[0, 1, 2, 3, 4, 5], help='The order of interpolation.', default=3)
parser.add_argument('--mode', dest='mode', choices=['constant', 'edge', 'symmetric', 'reflect', 'wrap'], default='edge')
args = parser.parse_args()
# ==============================================================================
# = opencv first =
# ==============================================================================
_DEAFAULT_JPG_QUALITY = 95
try:
import cv2
imread = cv2.imread
imwrite = partial(cv2.imwrite, params=[int(cv2.IMWRITE_JPEG_QUALITY), _DEAFAULT_JPG_QUALITY])
align_crop = cropper.align_crop_opencv
print('Use OpenCV')
except:
import skimage.io as io
imread = io.imread
imwrite = partial(io.imsave, quality=_DEAFAULT_JPG_QUALITY)
align_crop = cropper.align_crop_skimage
print('Importing OpenCv fails. Use scikit-image')
# ==============================================================================
# = run =
# ==============================================================================
# count landmarks
with open(args.landmark_file) as f:
line = f.readline()
n_landmark = len(re.split('[ ]+', line)[1:]) // 2
# load standard landmark
standard_landmark = np.genfromtxt(args.standard_landmark_file, dtype=np.float).reshape(n_landmark, 2)
standard_landmark[:, 0] += args.move_w
standard_landmark[:, 1] += args.move_h
# data dir
save_dir = os.path.join(args.save_dir, 'align_size(%d,%d)_move(%.3f,%.3f)_face_factor(%.3f)_%s' % (args.crop_size_h, args.crop_size_w, args.move_h, args.move_w, args.face_factor, args.save_format))
data_dir = os.path.join(save_dir, 'data')
if not os.path.isdir(data_dir):
os.makedirs(data_dir)
def work(name, landmark) -> str: # a single work
for _ in range(3): # try three times
try:
img = imread(os.path.join(args.img_dir, name))
img_crop, tformed_landmarks = align_crop(img,
landmark,
standard_landmark,
crop_size=(args.crop_size_h, args.crop_size_w),
face_factor=args.face_factor,
align_type=args.align_type,
order=args.order,
mode=args.mode)
name = os.path.splitext(name)[0] + '.' + args.save_format
path = os.path.join(data_dir, name)
if not os.path.isdir(os.path.split(path)[0]):
os.makedirs(os.path.split(path)[0])
imwrite(path, img_crop)
tformed_landmarks.shape = -1
name_landmark_str = ('%s' + ' %.1f' * n_landmark * 2) % ((name, ) + tuple(tformed_landmarks))
return name_landmark_str
except:
print('%s fails!' % name)
if __name__ == "__main__":
img_names = np.genfromtxt(args.landmark_file, dtype=np.str, usecols=0)
landmarks = np.genfromtxt(args.landmark_file, dtype=np.float,
usecols=range(1, n_landmark * 2 + 1)).reshape(-1, n_landmark, 2)
n_pics = len(img_names)
landmarks_path = os.path.join(save_dir, 'landmark.txt')
f = open(landmarks_path, 'w')
pool = Pool(args.n_worker)
bar = tqdm.tqdm(total=n_pics)
tasks = []
for i in range(n_pics):
tasks.append(pool.apply_async(work, (img_names[i], landmarks[i]), callback=lambda _: bar.update()))
try:
result = tasks.pop(0).get()
if result is not None and result != "":
f.write(result + '\n')
except:
pass
pool.close()
pool.join()
bar.close()
f.close()
\ No newline at end of file
import numpy as np
def align_crop_opencv(img,
src_landmarks,
standard_landmarks,
crop_size=512,
face_factor=0.7,
align_type='similarity',
order=3,
mode='edge'):
"""Align and crop a face image by landmarks.
Arguments:
img : Face image to be aligned and cropped.
src_landmarks : [[x_1, y_1], ..., [x_n, y_n]].
standard_landmarks : Standard shape, should be normalized.
crop_size : Output image size, should be 1. int for (crop_size, crop_size)
or 2. (int, int) for (crop_size_h, crop_size_w).
face_factor : The factor of face area relative to the output image.
align_type : 'similarity' or 'affine'.
order : The order of interpolation. The order has to be in the range 0-5:
- 0: INTER_NEAREST
- 1: INTER_LINEAR
- 2: INTER_AREA
- 3: INTER_CUBIC
- 4: INTER_LANCZOS4
- 5: INTER_LANCZOS4
mode : One of ['constant', 'edge', 'symmetric', 'reflect', 'wrap'].
Points outside the boundaries of the input are filled according
to the given mode.
"""
# set OpenCV
import cv2
inter = {0: cv2.INTER_NEAREST, 1: cv2.INTER_LINEAR, 2: cv2.INTER_AREA,
3: cv2.INTER_CUBIC, 4: cv2.INTER_LANCZOS4, 5: cv2.INTER_LANCZOS4}
border = {'constant': cv2.BORDER_CONSTANT, 'edge': cv2.BORDER_REPLICATE,
'symmetric': cv2.BORDER_REFLECT, 'reflect': cv2.BORDER_REFLECT101,
'wrap': cv2.BORDER_WRAP}
# check
assert align_type in ['affine', 'similarity'], 'Invalid `align_type`! Allowed: %s!' % ['affine', 'similarity']
assert order in [0, 1, 2, 3, 4, 5], 'Invalid `order`! Allowed: %s!' % [0, 1, 2, 3, 4, 5]
assert mode in ['constant', 'edge', 'symmetric', 'reflect', 'wrap'], 'Invalid `mode`! Allowed: %s!' % ['constant', 'edge', 'symmetric', 'reflect', 'wrap']
# crop size
if isinstance(crop_size, (list, tuple)) and len(crop_size) == 2:
crop_size_h = crop_size[0]
crop_size_w = crop_size[1]
elif isinstance(crop_size, int):
crop_size_h = crop_size_w = crop_size
else:
raise Exception('Invalid `crop_size`! `crop_size` should be 1. int for (crop_size, crop_size) or 2. (int, int) for (crop_size_h, crop_size_w)!')
# estimate transform matrix
trg_landmarks = standard_landmarks * max(crop_size_h, crop_size_w) * face_factor + np.array([crop_size_w // 2, crop_size_h // 2])
if align_type == 'affine':
tform = cv2.estimateAffine2D(trg_landmarks, src_landmarks, ransacReprojThreshold=np.Inf)[0]
else:
tform = cv2.estimateAffinePartial2D(trg_landmarks, src_landmarks, ransacReprojThreshold=np.Inf)[0]
# warp image by given transform
output_shape = (crop_size_h, crop_size_w)
img_crop = cv2.warpAffine(img, tform, output_shape[::-1], flags=cv2.WARP_INVERSE_MAP + inter[order], borderMode=border[mode])
# get transformed landmarks
tformed_landmarks = cv2.transform(np.expand_dims(src_landmarks, axis=0), cv2.invertAffineTransform(tform))[0]
return img_crop, tformed_landmarks
def align_crop_skimage(img,
src_landmarks,
standard_landmarks,
crop_size=512,
face_factor=0.7,
align_type='similarity',
order=3,
mode='edge'):
"""Align and crop a face image by landmarks.
Arguments:
img : Face image to be aligned and cropped.
src_landmarks : [[x_1, y_1], ..., [x_n, y_n]].
standard_landmarks : Standard shape, should be normalized.
crop_size : Output image size, should be 1. int for (crop_size, crop_size)
or 2. (int, int) for (crop_size_h, crop_size_w).
face_factor : The factor of face area relative to the output image.
align_type : 'similarity' or 'affine'.
order : The order of interpolation. The order has to be in the range 0-5:
- 0: INTER_NEAREST
- 1: INTER_LINEAR
- 2: INTER_AREA
- 3: INTER_CUBIC
- 4: INTER_LANCZOS4
- 5: INTER_LANCZOS4
mode : One of ['constant', 'edge', 'symmetric', 'reflect', 'wrap'].
Points outside the boundaries of the input are filled according
to the given mode.
"""
raise NotImplementedError("'align_crop_skimage' is not implemented!")
This diff could not be displayed because it is too large.
# Auto detect text files and perform LF normalization
* text=auto
*.pyc
docs
data
lfw
lfw_40
.idea
loss
vgg_face_dataset
saved_network
loss
z_detect_face.py
z_main.py
*.npy
*.Lnk
data1
data1_masked
scratch.py
subset
subset_masked
vgg_face_dataset
*.mp4
ML_examples
*.pptx
datasets
*.dat
*.docx
theme: jekyll-theme-cayman
\ No newline at end of file
# Author: aqeelanwar
# Created: 27 April,2020, 10:22 PM
# Email: aqeel.anwar@gatech.edu
import argparse
import dlib
from utils.aux_functions import *
# Command-line input setup
parser = argparse.ArgumentParser(
description="MaskTheFace - Python code to mask faces dataset"
)
parser.add_argument(
"--path",
type=str,
default="",
help="Path to either the folder containing images or the image itself",
)
parser.add_argument(
"--mask_type",
type=str,
default="surgical",
choices=["surgical", "N95", "KN95", "cloth", "gas", "inpaint", "random", "all"],
help="Type of the mask to be applied. Available options: all, surgical_blue, surgical_green, N95, cloth",
)
parser.add_argument(
"--pattern",
type=str,
default="",
help="Type of the pattern. Available options in masks/textures",
)
parser.add_argument(
"--pattern_weight",
type=float,
default=0.5,
help="Weight of the pattern. Must be between 0 and 1",
)
parser.add_argument(
"--color",
type=str,
default="#0473e2",
help="Hex color value that need to be overlayed to the mask",
)
parser.add_argument(
"--color_weight",
type=float,
default=0.5,
help="Weight of the color intensity. Must be between 0 and 1",
)
parser.add_argument(
"--code",
type=str,
# default="cloth-masks/textures/check/check_4.jpg, cloth-#e54294, cloth-#ff0000, cloth, cloth-masks/textures/others/heart_1.png, cloth-masks/textures/fruits/pineapple.png, N95, surgical_blue, surgical_green",
default="",
help="Generate specific formats",
)
parser.add_argument(
"--verbose", dest="verbose", action="store_true", help="Turn verbosity on"
)
parser.add_argument(
"--write_original_image",
dest="write_original_image",
action="store_true",
help="If true, original image is also stored in the masked folder",
)
parser.set_defaults(feature=False)
args = parser.parse_args()
args.write_path = args.path + "_masked"
# Set up dlib face detector and predictor
args.detector = dlib.get_frontal_face_detector()
path_to_dlib_model = "dlib_models/shape_predictor_68_face_landmarks.dat"
if not os.path.exists(path_to_dlib_model):
download_dlib_model()
args.predictor = dlib.shape_predictor(path_to_dlib_model)
# Extract data from code
mask_code = "".join(args.code.split()).split(",")
args.code_count = np.zeros(len(mask_code))
args.mask_dict_of_dict = {}
for i, entry in enumerate(mask_code):
mask_dict = {}
mask_color = ""
mask_texture = ""
mask_type = entry.split("-")[0]
if len(entry.split("-")) == 2:
mask_variation = entry.split("-")[1]
if "#" in mask_variation:
mask_color = mask_variation
else:
mask_texture = mask_variation
mask_dict["type"] = mask_type
mask_dict["color"] = mask_color
mask_dict["texture"] = mask_texture
args.mask_dict_of_dict[i] = mask_dict
# Check if path is file or directory or none
is_directory, is_file, is_other = check_path(args.path)
display_MaskTheFace()
if is_directory:
path, dirs, files = os.walk(args.path).__next__()
file_count = len(files)
dirs_count = len(dirs)
if len(files) > 0:
print_orderly("Masking image files", 60)
# Process files in the directory if any
for f in tqdm(files):
image_path = path + "/" + f
write_path = path + "_masked"
if not os.path.isdir(write_path):
os.makedirs(write_path)
if is_image(image_path):
# Proceed if file is image
if args.verbose:
str_p = "Processing: " + image_path
tqdm.write(str_p)
split_path = f.rsplit(".")
masked_image, mask, mask_binary_array, original_image = mask_image(
image_path, args
)
for i in range(len(mask)):
w_path = (
write_path
+ "/"
+ split_path[0]
+ "_"
+ "masked"
+ "."
+ split_path[1]
)
img = masked_image[i]
binary_img = mask_binary_array[i]
cv2.imwrite(w_path, img)
cv2.imwrite(
path + "_binary/" + split_path[0] + "_binary" + "." + split_path[1],
binary_img,
)
cv2.imwrite(
path + "_original/" + split_path[0] + "." + split_path[1],
original_image,
)
print_orderly("Masking image directories", 60)
# Process directories withing the path provided
for d in tqdm(dirs):
dir_path = args.path + "/" + d
dir_write_path = args.write_path + "/" + d
if not os.path.isdir(dir_write_path):
os.makedirs(dir_write_path)
_, _, files = os.walk(dir_path).__next__()
# Process each files within subdirectory
for f in files:
image_path = dir_path + "/" + f
if args.verbose:
str_p = "Processing: " + image_path
tqdm.write(str_p)
write_path = dir_write_path
if is_image(image_path):
# Proceed if file is image
split_path = f.rsplit(".")
masked_image, mask, mask_binary, original_image = mask_image(
image_path, args
)
for i in range(len(mask)):
w_path = (
write_path
+ "/"
+ split_path[0]
+ "_"
+ "masked"
+ "."
+ split_path[1]
)
w_path_original = write_path + "/" + f
img = masked_image[i]
binary_img = mask_binary[i]
cv2.imwrite(
path
+ "_binary/"
+ split_path[0]
+ "_binary"
+ "."
+ split_path[1],
binary_img,
)
# Write the masked image
cv2.imwrite(w_path, img)
if args.write_original_image:
# Write the original image
cv2.imwrite(w_path_original, original_image)
if args.verbose:
print(args.code_count)
# Process if the path was a file
elif is_file:
print("Masking image file")
image_path = args.path
write_path = args.path.rsplit(".")[0]
if is_image(image_path):
# Proceed if file is image
# masked_images, mask, mask_binary_array, original_image
masked_image, mask, mask_binary_array, original_image = mask_image(
image_path, args
)
for i in range(len(mask)):
w_path = write_path + "_" + "masked" + "." + args.path.rsplit(".")[1]
img = masked_image[i]
binary_img = mask_binary_array[i]
cv2.imwrite(w_path, img)
cv2.imwrite(write_path + "_binary." + args.path.rsplit(".")[1], binary_img)
else:
print("Path is neither a valid file or a valid directory")
print("Processing Done")
[surgical]
template: masks/templates/surgical.png
mask_a: 21, 97
mask_b: 307, 22
mask_c: 600, 99
mask_d: 25, 322
mask_e: 295, 470
mask_f: 600, 323
[surgical_left]
template: masks/templates/surgical_left.png
mask_a: 39, 27
mask_b: 130, 9
mask_c: 567, 20
mask_d: 87, 207
mask_e: 168, 302
mask_f: 568, 202
[surgical_right]
template: masks/templates/surgical_right.png
mask_a: 3, 20
mask_b: 440, 9
mask_c: 531, 27
mask_d: 2, 202
mask_e: 402, 302
mask_f: 483, 207
[surgical_green]
template: masks/templates/surgical_green.png
mask_a: 21, 97
mask_b: 307, 22
mask_c: 600, 99
mask_d: 25, 322
mask_e: 295, 470
mask_f: 600, 323
[surgical_green_left]
template: masks/templates/surgical_green_left.png
mask_a: 39, 27
mask_b: 130, 9
mask_c: 567, 20
mask_d: 87, 207
mask_e: 168, 302
mask_f: 568, 202
[surgical_green_right]
template: masks/templates/surgical_green_right.png
mask_a: 3, 20
mask_b: 440, 9
mask_c: 531, 27
mask_d: 2, 202
mask_e: 402, 302
mask_f: 483, 207
[surgical_blue]
template: masks/templates/surgical_blue.png
mask_a: 21, 97
mask_b: 307, 22
mask_c: 600, 99
mask_d: 25, 322
mask_e: 295, 470
mask_f: 600, 323
[surgical_blue_left]
template: masks/templates/surgical_blue_left.png
mask_a: 39, 27
mask_b: 130, 9
mask_c: 567, 20
mask_d: 87, 207
mask_e: 168, 302
mask_f: 568, 202
[surgical_blue_right]
template: masks/templates/surgical_blue_right.png
mask_a: 3, 20
mask_b: 440, 9
mask_c: 531, 27
mask_d: 2, 202
mask_e: 402, 302
mask_f: 483, 207
[N95]
template: masks/templates/N95.png
mask_a: 15, 119
mask_b: 327, 5
mask_c: 640, 93
mask_d: 13, 285
mask_e: 351, 518
mask_f: 645, 285
;[N95_left]
;template: masks/N95_left.png
;mask_a: 176, 121
;mask_b: 313, 46
;mask_c: 799, 135
;mask_d: 97, 438
;mask_e: 329, 627
;mask_f: 791, 401
[N95_right]
template: masks/templates/N95_right.png
mask_c: 979, 331
mask_b: 806, 172
mask_a: 12, 222
mask_f: 907, 762
mask_e: 577, 875
mask_d: -4, 632
[N95_left]
template: masks/templates/N95_left.png
mask_a: 193, 331
mask_b: 366, 172
mask_c: 1160, 222
mask_d: 265, 762
mask_e: 595, 875
mask_f: 1176, 632
[cloth_left]
template: masks/templates/cloth_left.png
mask_a: 65, 93
mask_b: 162, 15
mask_c: 672, 75
mask_d: 114, 296
mask_e: 207, 443
mask_f: 671, 341
[cloth_right]
template: masks/templates/cloth_right.png
mask_a: 98, 93
mask_b: 608, 15
mask_c: 705, 75
mask_d: 99, 296
mask_e: 563, 443
mask_f: 656, 341
[cloth]
template: masks/templates/cloth.png
mask_a: 122, 90
mask_b: 405, 7
mask_c: 686, 79
mask_d: 165, 323
mask_e: 406, 509
mask_f: 653, 311
[gas]
template: masks/templates/gas.png
mask_a: 330, 431
mask_b: 873, 117
mask_c: 1494, 434
mask_d: 430, 754
mask_e: 869, 1100
mask_f: 1400, 710
[gas_left]
template: masks/templates/gas_left.png
mask_a: 239, 238
mask_b: 317, 42
mask_c: 965, 239
mask_d: 224, 404
mask_e: 337, 502
mask_f: 963, 406
[gas_right]
template: masks/templates/gas_right.png
mask_c: 621, 238
mask_b: 543, 60
mask_a: -105, 239
mask_f: 636, 404
mask_e: 523, 502
mask_d: -103, 406
[KN95]
template: masks/templates/KN95.png
mask_a: 20, 47
mask_b: 410, 5
mask_c: 760, 55
mask_d: 75, 340
mask_e: 398, 600
mask_f: 671, 320
[KN95_left]
template: masks/templates/KN95_left.png
mask_a: 52, 258
mask_b: 207, 100
mask_c: 730, 80
mask_d: 210, 408
mask_e: 335, 604
mask_f: 770, 270
[KN95_right]
template: masks/templates/KN95_right.png
mask_c: 664, 258
mask_b: 509, 100
mask_a: -14, 80
mask_f: 506, 408
mask_e: 381, 604
mask_d: -54, 270
[empty]
[empty_left]
[empty_right]
[inpaint]
[inpaint_left]
[inpaint_right]
certifi==2020.4.5.1
click==7.1.2
dlib==19.19.0
dotmap==1.3.14
face-recognition==1.3.0
face-recognition-models==0.3.0
numpy==1.18.4
opencv-python==4.2.0.34
Pillow==7.1.2
tqdm==4.46.0
wincertstore==0.2
imutils==0.5.3
requests==2.24.0
# Author: Aqeel Anwar(ICSRL)
# Created: 7/30/2020, 7:43 AM
# Email: aqeel.anwar@gatech.edu
\ No newline at end of file
# Author: aqeelanwar
# Created: 27 April,2020, 10:21 PM
# Email: aqeel.anwar@gatech.edu
from configparser import ConfigParser
import cv2, math, os
from PIL import Image, ImageDraw
from tqdm import tqdm
from utils.read_cfg import read_cfg
from utils.fit_ellipse import *
import random
from utils.create_mask import texture_the_mask, color_the_mask
from imutils import face_utils
import requests
from zipfile import ZipFile
from tqdm import tqdm
import bz2, shutil
def download_dlib_model():
print_orderly("Get dlib model", 60)
dlib_model_link = "http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2"
print("Downloading dlib model...")
with requests.get(dlib_model_link, stream=True) as r:
print("Zip file size: ", np.round(len(r.content) / 1024 / 1024, 2), "MB")
destination = (
"dlib_models" + os.path.sep + "shape_predictor_68_face_landmarks.dat.bz2"
)
if not os.path.exists(destination.rsplit(os.path.sep, 1)[0]):
os.mkdir(destination.rsplit(os.path.sep, 1)[0])
print("Saving dlib model...")
with open(destination, "wb") as fd:
for chunk in r.iter_content(chunk_size=32678):
fd.write(chunk)
print("Extracting dlib model...")
with bz2.BZ2File(destination) as fr, open(
"dlib_models/shape_predictor_68_face_landmarks.dat", "wb"
) as fw:
shutil.copyfileobj(fr, fw)
print("Saved: ", destination)
print_orderly("done", 60)
os.remove(destination)
def get_line(face_landmark, image, type="eye", debug=False):
pil_image = Image.fromarray(image)
d = ImageDraw.Draw(pil_image)
left_eye = face_landmark["left_eye"]
right_eye = face_landmark["right_eye"]
left_eye_mid = np.mean(np.array(left_eye), axis=0)
right_eye_mid = np.mean(np.array(right_eye), axis=0)
eye_line_mid = (left_eye_mid + right_eye_mid) / 2
if type == "eye":
left_point = left_eye_mid
right_point = right_eye_mid
mid_point = eye_line_mid
elif type == "nose_mid":
nose_length = (
face_landmark["nose_bridge"][-1][1] - face_landmark["nose_bridge"][0][1]
)
left_point = [left_eye_mid[0], left_eye_mid[1] + nose_length / 2]
right_point = [right_eye_mid[0], right_eye_mid[1] + nose_length / 2]
# mid_point = (
# face_landmark["nose_bridge"][-1][1] + face_landmark["nose_bridge"][0][1]
# ) / 2
mid_pointY = (
face_landmark["nose_bridge"][-1][1] + face_landmark["nose_bridge"][0][1]
) / 2
mid_pointX = (
face_landmark["nose_bridge"][-1][0] + face_landmark["nose_bridge"][0][0]
) / 2
mid_point = (mid_pointX, mid_pointY)
elif type == "nose_tip":
nose_length = (
face_landmark["nose_bridge"][-1][1] - face_landmark["nose_bridge"][0][1]
)
left_point = [left_eye_mid[0], left_eye_mid[1] + nose_length]
right_point = [right_eye_mid[0], right_eye_mid[1] + nose_length]
mid_point = (
face_landmark["nose_bridge"][-1][1] + face_landmark["nose_bridge"][0][1]
) / 2
elif type == "bottom_lip":
bottom_lip = face_landmark["bottom_lip"]
bottom_lip_mid = np.max(np.array(bottom_lip), axis=0)
shiftY = bottom_lip_mid[1] - eye_line_mid[1]
left_point = [left_eye_mid[0], left_eye_mid[1] + shiftY]
right_point = [right_eye_mid[0], right_eye_mid[1] + shiftY]
mid_point = bottom_lip_mid
elif type == "perp_line":
bottom_lip = face_landmark["bottom_lip"]
bottom_lip_mid = np.mean(np.array(bottom_lip), axis=0)
left_point = eye_line_mid
left_point = face_landmark["nose_bridge"][0]
right_point = bottom_lip_mid
mid_point = bottom_lip_mid
elif type == "nose_long":
nose_bridge = face_landmark["nose_bridge"]
left_point = [nose_bridge[0][0], nose_bridge[0][1]]
right_point = [nose_bridge[-1][0], nose_bridge[-1][1]]
mid_point = left_point
# d.line(eye_mid, width=5, fill='red')
y = [left_point[1], right_point[1]]
x = [left_point[0], right_point[0]]
# cv2.imshow('h', image)
# cv2.waitKey(0)
eye_line = fit_line(x, y, image)
d.line(eye_line, width=5, fill="blue")
# Perpendicular Line
# (midX, midY) and (midX - y2 + y1, midY + x2 - x1)
y = [
(left_point[1] + right_point[1]) / 2,
(left_point[1] + right_point[1]) / 2 + right_point[0] - left_point[0],
]
x = [
(left_point[0] + right_point[0]) / 2,
(left_point[0] + right_point[0]) / 2 - right_point[1] + left_point[1],
]
perp_line = fit_line(x, y, image)
if debug:
d.line(perp_line, width=5, fill="red")
pil_image.show()
return eye_line, perp_line, left_point, right_point, mid_point
def get_points_on_chin(line, face_landmark, chin_type="chin"):
chin = face_landmark[chin_type]
points_on_chin = []
for i in range(len(chin) - 1):
chin_first_point = [chin[i][0], chin[i][1]]
chin_second_point = [chin[i + 1][0], chin[i + 1][1]]
flag, x, y = line_intersection(line, (chin_first_point, chin_second_point))
if flag:
points_on_chin.append((x, y))
return points_on_chin
def plot_lines(face_line, image, debug=False):
pil_image = Image.fromarray(image)
if debug:
d = ImageDraw.Draw(pil_image)
d.line(face_line, width=4, fill="white")
pil_image.show()
def line_intersection(line1, line2):
# mid = int(len(line1) / 2)
start = 0
end = -1
line1 = ([line1[start][0], line1[start][1]], [line1[end][0], line1[end][1]])
xdiff = (line1[0][0] - line1[1][0], line2[0][0] - line2[1][0])
ydiff = (line1[0][1] - line1[1][1], line2[0][1] - line2[1][1])
x = []
y = []
flag = False
def det(a, b):
return a[0] * b[1] - a[1] * b[0]
div = det(xdiff, ydiff)
if div == 0:
return flag, x, y
d = (det(*line1), det(*line2))
x = det(d, xdiff) / div
y = det(d, ydiff) / div
segment_minX = min(line2[0][0], line2[1][0])
segment_maxX = max(line2[0][0], line2[1][0])
segment_minY = min(line2[0][1], line2[1][1])
segment_maxY = max(line2[0][1], line2[1][1])
if (
segment_maxX + 1 >= x >= segment_minX - 1
and segment_maxY + 1 >= y >= segment_minY - 1
):
flag = True
return flag, x, y
def fit_line(x, y, image):
if x[0] == x[1]:
x[0] += 0.1
coefficients = np.polyfit(x, y, 1)
polynomial = np.poly1d(coefficients)
x_axis = np.linspace(0, image.shape[1], 50)
y_axis = polynomial(x_axis)
eye_line = []
for i in range(len(x_axis)):
eye_line.append((x_axis[i], y_axis[i]))
return eye_line
def get_six_points(face_landmark, image):
_, perp_line1, _, _, m = get_line(face_landmark, image, type="nose_mid")
face_b = m
perp_line, _, _, _, _ = get_line(face_landmark, image, type="perp_line")
points1 = get_points_on_chin(perp_line1, face_landmark)
points = get_points_on_chin(perp_line, face_landmark)
if not points1:
face_e = tuple(np.asarray(points[0]))
elif not points:
face_e = tuple(np.asarray(points1[0]))
else:
face_e = tuple((np.asarray(points[0]) + np.asarray(points1[0])) / 2)
# face_e = points1[0]
nose_mid_line, _, _, _, _ = get_line(face_landmark, image, type="nose_long")
angle = get_angle(perp_line, nose_mid_line)
# print("angle: ", angle)
nose_mid_line, _, _, _, _ = get_line(face_landmark, image, type="nose_tip")
points = get_points_on_chin(nose_mid_line, face_landmark)
if len(points) < 2:
face_landmark = get_face_ellipse(face_landmark)
# print("extrapolating chin")
points = get_points_on_chin(
nose_mid_line, face_landmark, chin_type="chin_extrapolated"
)
if len(points) < 2:
points = []
points.append(face_landmark["chin"][0])
points.append(face_landmark["chin"][-1])
face_a = points[0]
face_c = points[-1]
# cv2.imshow('j', image)
# cv2.waitKey(0)
nose_mid_line, _, _, _, _ = get_line(face_landmark, image, type="bottom_lip")
points = get_points_on_chin(nose_mid_line, face_landmark)
face_d = points[0]
face_f = points[-1]
six_points = np.float32([face_a, face_b, face_c, face_f, face_e, face_d])
return six_points, angle
def get_angle(line1, line2):
delta_y = line1[-1][1] - line1[0][1]
delta_x = line1[-1][0] - line1[0][0]
perp_angle = math.degrees(math.atan2(delta_y, delta_x))
if delta_x < 0:
perp_angle = perp_angle + 180
if perp_angle < 0:
perp_angle += 360
if perp_angle > 180:
perp_angle -= 180
# print("perp", perp_angle)
delta_y = line2[-1][1] - line2[0][1]
delta_x = line2[-1][0] - line2[0][0]
nose_angle = math.degrees(math.atan2(delta_y, delta_x))
if delta_x < 0:
nose_angle = nose_angle + 180
if nose_angle < 0:
nose_angle += 360
if nose_angle > 180:
nose_angle -= 180
# print("nose", nose_angle)
angle = nose_angle - perp_angle
return angle
def mask_face(image, face_location, six_points, angle, args, type="surgical"):
debug = False
# Find the face angle
threshold = 13
if angle < -threshold:
type += "_right"
elif angle > threshold:
type += "_left"
face_height = face_location[2] - face_location[0]
face_width = face_location[1] - face_location[3]
# image = image_raw[
# face_location[0]-int(face_width/2): face_location[2]+int(face_width/2),
# face_location[3]-int(face_height/2): face_location[1]+int(face_height/2),
# :,
# ]
# cv2.imshow('win', image)
# cv2.waitKey(0)
# Read appropriate mask image
w = image.shape[0]
h = image.shape[1]
if not "empty" in type and not "inpaint" in type:
cfg = read_cfg(config_filename="masks/masks.cfg", mask_type=type, verbose=False)
else:
if "left" in type:
str = "surgical_blue_left"
elif "right" in type:
str = "surgical_blue_right"
else:
str = "surgical_blue"
cfg = read_cfg(config_filename="masks/masks.cfg", mask_type=str, verbose=False)
img = cv2.imread(cfg.template, cv2.IMREAD_UNCHANGED)
# Process the mask if necessary
if args.pattern:
# Apply pattern to mask
img = texture_the_mask(img, args.pattern, args.pattern_weight)
if args.color:
# Apply color to mask
img = color_the_mask(img, args.color, args.color_weight)
mask_line = np.float32(
[cfg.mask_a, cfg.mask_b, cfg.mask_c, cfg.mask_f, cfg.mask_e, cfg.mask_d]
)
# Warp the mask
M, mask = cv2.findHomography(mask_line, six_points)
dst_mask = cv2.warpPerspective(img, M, (h, w))
dst_mask_points = cv2.perspectiveTransform(mask_line.reshape(-1, 1, 2), M)
mask = dst_mask[:, :, 3]
face_height = face_location[2] - face_location[0]
face_width = face_location[1] - face_location[3]
image_face = image[
face_location[0] + int(face_height / 2) : face_location[2],
face_location[3] : face_location[1],
:,
]
image_face = image
# Adjust Brightness
mask_brightness = get_avg_brightness(img)
img_brightness = get_avg_brightness(image_face)
delta_b = 1 + (img_brightness - mask_brightness) / 255
dst_mask = change_brightness(dst_mask, delta_b)
# Adjust Saturation
mask_saturation = get_avg_saturation(img)
img_saturation = get_avg_saturation(image_face)
delta_s = 1 - (img_saturation - mask_saturation) / 255
dst_mask = change_saturation(dst_mask, delta_s)
# Apply mask
mask_inv = cv2.bitwise_not(mask)
img_bg = cv2.bitwise_and(image, image, mask=mask_inv)
img_fg = cv2.bitwise_and(dst_mask, dst_mask, mask=mask)
out_img = cv2.add(img_bg, img_fg[:, :, 0:3])
if "empty" in type or "inpaint" in type:
out_img = img_bg
# Plot key points
if "inpaint" in type:
out_img = cv2.inpaint(out_img, mask, 3, cv2.INPAINT_TELEA)
# dst_NS = cv2.inpaint(img, mask, 3, cv2.INPAINT_NS)
if debug:
for i in six_points:
cv2.circle(out_img, (i[0], i[1]), radius=4, color=(0, 0, 255), thickness=-1)
for i in dst_mask_points:
cv2.circle(
out_img, (i[0][0], i[0][1]), radius=4, color=(0, 255, 0), thickness=-1
)
return out_img, mask
def draw_landmarks(face_landmarks, image):
pil_image = Image.fromarray(image)
d = ImageDraw.Draw(pil_image)
for facial_feature in face_landmarks.keys():
d.line(face_landmarks[facial_feature], width=5, fill="white")
pil_image.show()
def get_face_ellipse(face_landmark):
chin = face_landmark["chin"]
x = []
y = []
for point in chin:
x.append(point[0])
y.append(point[1])
x = np.asarray(x)
y = np.asarray(y)
a = fitEllipse(x, y)
center = ellipse_center(a)
phi = ellipse_angle_of_rotation(a)
axes = ellipse_axis_length(a)
a, b = axes
arc = 2.2
R = np.arange(0, arc * np.pi, 0.2)
xx = center[0] + a * np.cos(R) * np.cos(phi) - b * np.sin(R) * np.sin(phi)
yy = center[1] + a * np.cos(R) * np.sin(phi) + b * np.sin(R) * np.cos(phi)
chin_extrapolated = []
for i in range(len(R)):
chin_extrapolated.append((xx[i], yy[i]))
face_landmark["chin_extrapolated"] = chin_extrapolated
return face_landmark
def get_avg_brightness(img):
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(img_hsv)
return np.mean(v)
def get_avg_saturation(img):
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(img_hsv)
return np.mean(v)
def change_brightness(img, value=1.0):
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(img_hsv)
v = value * v
v[v > 255] = 255
v = np.asarray(v, dtype=np.uint8)
final_hsv = cv2.merge((h, s, v))
img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
return img
def change_saturation(img, value=1.0):
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(img_hsv)
s = value * s
s[s > 255] = 255
s = np.asarray(s, dtype=np.uint8)
final_hsv = cv2.merge((h, s, v))
img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
return img
def check_path(path):
is_directory = False
is_file = False
is_other = False
if os.path.isdir(path):
is_directory = True
elif os.path.isfile(path):
is_file = True
else:
is_other = True
return is_directory, is_file, is_other
def shape_to_landmarks(shape):
face_landmarks = {}
face_landmarks["left_eyebrow"] = [
tuple(shape[17]),
tuple(shape[18]),
tuple(shape[19]),
tuple(shape[20]),
tuple(shape[21]),
]
face_landmarks["right_eyebrow"] = [
tuple(shape[22]),
tuple(shape[23]),
tuple(shape[24]),
tuple(shape[25]),
tuple(shape[26]),
]
face_landmarks["nose_bridge"] = [
tuple(shape[27]),
tuple(shape[28]),
tuple(shape[29]),
tuple(shape[30]),
]
face_landmarks["nose_tip"] = [
tuple(shape[31]),
tuple(shape[32]),
tuple(shape[33]),
tuple(shape[34]),
tuple(shape[35]),
]
face_landmarks["left_eye"] = [
tuple(shape[36]),
tuple(shape[37]),
tuple(shape[38]),
tuple(shape[39]),
tuple(shape[40]),
tuple(shape[41]),
]
face_landmarks["right_eye"] = [
tuple(shape[42]),
tuple(shape[43]),
tuple(shape[44]),
tuple(shape[45]),
tuple(shape[46]),
tuple(shape[47]),
]
face_landmarks["top_lip"] = [
tuple(shape[48]),
tuple(shape[49]),
tuple(shape[50]),
tuple(shape[51]),
tuple(shape[52]),
tuple(shape[53]),
tuple(shape[54]),
tuple(shape[60]),
tuple(shape[61]),
tuple(shape[62]),
tuple(shape[63]),
tuple(shape[64]),
]
face_landmarks["bottom_lip"] = [
tuple(shape[54]),
tuple(shape[55]),
tuple(shape[56]),
tuple(shape[57]),
tuple(shape[58]),
tuple(shape[59]),
tuple(shape[48]),
tuple(shape[64]),
tuple(shape[65]),
tuple(shape[66]),
tuple(shape[67]),
tuple(shape[60]),
]
face_landmarks["chin"] = [
tuple(shape[0]),
tuple(shape[1]),
tuple(shape[2]),
tuple(shape[3]),
tuple(shape[4]),
tuple(shape[5]),
tuple(shape[6]),
tuple(shape[7]),
tuple(shape[8]),
tuple(shape[9]),
tuple(shape[10]),
tuple(shape[11]),
tuple(shape[12]),
tuple(shape[13]),
tuple(shape[14]),
tuple(shape[15]),
tuple(shape[16]),
]
return face_landmarks
def rect_to_bb(rect):
x1 = rect.left()
x2 = rect.right()
y1 = rect.top()
y2 = rect.bottom()
return (x1, x2, y2, x1)
def mask_image(image_path, args):
# Read the image
image = cv2.imread(image_path)
original_image = image.copy()
# gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = image
face_locations = args.detector(gray, 1)
mask_type = args.mask_type
verbose = args.verbose
if args.code:
ind = random.randint(0, len(args.code_count) - 1)
mask_dict = args.mask_dict_of_dict[ind]
mask_type = mask_dict["type"]
args.color = mask_dict["color"]
args.pattern = mask_dict["texture"]
args.code_count[ind] += 1
elif mask_type == "random":
available_mask_types = get_available_mask_types()
mask_type = random.choice(available_mask_types)
if verbose:
tqdm.write("Faces found: {:2d}".format(len(face_locations)))
# Process each face in the image
masked_images = []
mask_binary_array = []
mask = []
for (i, face_location) in enumerate(face_locations):
shape = args.predictor(gray, face_location)
shape = face_utils.shape_to_np(shape)
face_landmarks = shape_to_landmarks(shape)
face_location = rect_to_bb(face_location)
# draw_landmarks(face_landmarks, image)
six_points_on_face, angle = get_six_points(face_landmarks, image)
mask = []
if mask_type != "all":
if len(masked_images) > 0:
image = masked_images.pop(0)
image, mask_binary = mask_face(
image, face_location, six_points_on_face, angle, args, type=mask_type
)
# compress to face tight
face_height = face_location[2] - face_location[0]
face_width = face_location[1] - face_location[3]
masked_images.append(image)
mask_binary_array.append(mask_binary)
mask.append(mask_type)
else:
available_mask_types = get_available_mask_types()
for m in range(len(available_mask_types)):
if len(masked_images) == len(available_mask_types):
image = masked_images.pop(m)
img, mask_binary = mask_face(
image,
face_location,
six_points_on_face,
angle,
args,
type=available_mask_types[m],
)
masked_images.insert(m, img)
mask_binary_array.insert(m, mask_binary)
mask = available_mask_types
cc = 1
return masked_images, mask, mask_binary_array, original_image
def is_image(path):
try:
extensions = path[-4:]
image_extensions = ["png", "PNG", "jpg", "JPG"]
if extensions[1:] in image_extensions:
return True
else:
print("Please input image file. png / jpg")
return False
except:
return False
def get_available_mask_types(config_filename="masks/masks.cfg"):
parser = ConfigParser()
parser.optionxform = str
parser.read(config_filename)
available_mask_types = parser.sections()
available_mask_types = [
string for string in available_mask_types if "left" not in string
]
available_mask_types = [
string for string in available_mask_types if "right" not in string
]
return available_mask_types
def print_orderly(str, n):
# print("")
hyphens = "-" * int((n - len(str)) / 2)
str_p = hyphens + " " + str + " " + hyphens
hyphens_bar = "-" * len(str_p)
print(hyphens_bar)
print(str_p)
print(hyphens_bar)
def display_MaskTheFace():
with open("utils/display.txt", "r") as file:
for line in file:
cc = 1
print(line, end="")
# Author: aqeelanwar
# Created: 6 July,2020, 12:14 AM
# Email: aqeel.anwar@gatech.edu
from PIL import ImageColor
import cv2
import numpy as np
COLOR = [
"#fc1c1a",
"#177ABC",
"#94B6D2",
"#A5AB81",
"#DD8047",
"#6b425e",
"#e26d5a",
"#c92c48",
"#6a506d",
"#ffc900",
"#ffffff",
"#000000",
"#49ff00",
]
def color_the_mask(mask_image, color, intensity):
assert 0 <= intensity <= 1, "intensity should be between 0 and 1"
RGB_color = ImageColor.getcolor(color, "RGB")
RGB_color = (RGB_color[2], RGB_color[1], RGB_color[0])
orig_shape = mask_image.shape
bit_mask = mask_image[:, :, 3]
mask_image = mask_image[:, :, 0:3]
color_image = np.full(mask_image.shape, RGB_color, np.uint8)
mask_color = cv2.addWeighted(mask_image, 1 - intensity, color_image, intensity, 0)
mask_color = cv2.bitwise_and(mask_color, mask_color, mask=bit_mask)
colored_mask = np.zeros(orig_shape, dtype=np.uint8)
colored_mask[:, :, 0:3] = mask_color
colored_mask[:, :, 3] = bit_mask
return colored_mask
def texture_the_mask(mask_image, texture_path, intensity):
assert 0 <= intensity <= 1, "intensity should be between 0 and 1"
orig_shape = mask_image.shape
bit_mask = mask_image[:, :, 3]
mask_image = mask_image[:, :, 0:3]
texture_image = cv2.imread(texture_path)
texture_image = cv2.resize(texture_image, (orig_shape[1], orig_shape[0]))
mask_texture = cv2.addWeighted(
mask_image, 1 - intensity, texture_image, intensity, 0
)
mask_texture = cv2.bitwise_and(mask_texture, mask_texture, mask=bit_mask)
textured_mask = np.zeros(orig_shape, dtype=np.uint8)
textured_mask[:, :, 0:3] = mask_texture
textured_mask[:, :, 3] = bit_mask
return textured_mask
# cloth_mask = cv2.imread("masks/templates/cloth.png", cv2.IMREAD_UNCHANGED)
# # cloth_mask = color_the_mask(cloth_mask, color=COLOR[0], intensity=0.5)
# path = "masks/textures"
# path, dir, files = os.walk(path).__next__()
# first_frame = True
# col_limit = 6
# i = 0
# # img_concat_row=[]
# img_concat = []
# # for f in files:
# # if "._" not in f:
# # print(f)
# # i += 1
# # texture_image = cv2.imread(os.path.join(path, f))
# # m = texture_the_mask(cloth_mask, texture_image, intensity=0.5)
# # if first_frame:
# # img_concat_row = m
# # first_frame = False
# # else:
# # img_concat_row = cv2.hconcat((img_concat_row, m))
# #
# # if i % col_limit == 0:
# # if len(img_concat) > 0:
# # img_concat = cv2.vconcat((img_concat, img_concat_row))
# # else:
# # img_concat = img_concat_row
# # first_frame = True
#
# ## COlor the mask
# thresholds = np.arange(0.1,0.9,0.05)
# for intensity in thresholds:
# c=COLOR[2]
# # intensity = 0.5
# if "._" not in c:
# print(intensity)
# i += 1
# # texture_image = cv2.imread(os.path.join(path, f))
# m = color_the_mask(cloth_mask, c, intensity=intensity)
# if first_frame:
# img_concat_row = m
# first_frame = False
# else:
# img_concat_row = cv2.hconcat((img_concat_row, m))
#
# if i % col_limit == 0:
# if len(img_concat) > 0:
# img_concat = cv2.vconcat((img_concat, img_concat_row))
# else:
# img_concat = img_concat_row
# first_frame = True
#
#
# cv2.imshow("k", img_concat)
# cv2.imwrite("combine_N95_left.png", img_concat)
# cv2.waitKey(0)
# cc = 1
__ __ _ _______ _ ______
| \/ | | |__ __| | | ____|
| \ / | __ _ ___| | _| | | |__ ___| |__ __ _ ___ ___
| |\/| |/ _` / __| |/ / | | '_ \ / _ \ __/ _` |/ __/ _ \
| | | | (_| \__ \ <| | | | | | __/ | | (_| | (_| __/
|_| |_|\__,_|___/_|\_\_| |_| |_|\___|_| \__,_|\___\___|
# Author: Aqeel Anwar(ICSRL)
# Created: 7/30/2020, 1:44 PM
# Email: aqeel.anwar@gatech.edu
# Code resued from https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url
# Make sure you run this from parent folder and not from utils folder i.e.
# python utils/fetch_dataset.py
import requests, os
from zipfile import ZipFile
import argparse
import urllib
parser = argparse.ArgumentParser(
description="Download dataset - Python code to download associated datasets"
)
parser.add_argument(
"--dataset",
type=str,
default="mfr2",
help="Name of the dataset - Details on available datasets can be found at GitHub Page",
)
args = parser.parse_args()
def download_file_from_google_drive(id, destination):
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params={"id": id}, stream=True)
token = get_confirm_token(response)
if token:
params = {"id": id, "confirm": token}
response = session.get(URL, params=params, stream=True)
save_response_content(response, destination)
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith("download_warning"):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
print(destination)
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
def download(t_url):
response = urllib.request.urlopen(t_url)
data = response.read()
txt_str = str(data)
lines = txt_str.split("\\n")
return lines
def Convert(lst):
it = iter(lst)
res_dct = dict(zip(it, it))
return res_dct
if __name__ == "__main__":
# Fetch the latest download_links.txt file from GitHub
link = "https://raw.githubusercontent.com/aqeelanwar/MaskTheFace/master/datasets/download_links.txt"
links_dict = Convert(
download(link)[0]
.replace(":", "\n")
.replace("b'", "")
.replace("'", "")
.replace(" ", "")
.split("\n")
)
file_id = links_dict[args.dataset]
destination = "datasets\_.zip"
print("Downloading: ", args.dataset)
download_file_from_google_drive(file_id, destination)
print("Extracting: ", args.dataset)
with ZipFile(destination, "r") as zipObj:
# Extract all the contents of zip file in current directory
zipObj.extractall(destination.rsplit(os.path.sep, 1)[0])
os.remove(destination)
# Author: aqeelanwar
# Created: 4 May,2020, 1:30 AM
# Email: aqeel.anwar@gatech.edu
import numpy as np
from numpy.linalg import eig, inv
def fitEllipse(x,y):
x = x[:,np.newaxis]
y = y[:,np.newaxis]
D = np.hstack((x*x, x*y, y*y, x, y, np.ones_like(x)))
S = np.dot(D.T,D)
C = np.zeros([6,6])
C[0,2] = C[2,0] = 2; C[1,1] = -1
E, V = eig(np.dot(inv(S), C))
n = np.argmax(np.abs(E))
a = V[:,n]
return a
def ellipse_center(a):
b,c,d,f,g,a = a[1]/2, a[2], a[3]/2, a[4]/2, a[5], a[0]
num = b*b-a*c
x0=(c*d-b*f)/num
y0=(a*f-b*d)/num
return np.array([x0,y0])
def ellipse_angle_of_rotation( a ):
b,c,d,f,g,a = a[1]/2, a[2], a[3]/2, a[4]/2, a[5], a[0]
return 0.5*np.arctan(2*b/(a-c))
def ellipse_axis_length( a ):
b,c,d,f,g,a = a[1]/2, a[2], a[3]/2, a[4]/2, a[5], a[0]
up = 2*(a*f*f+c*d*d+g*b*b-2*b*d*f-a*c*g)
down1=(b*b-a*c)*( (c-a)*np.sqrt(1+4*b*b/((a-c)*(a-c)))-(c+a))
down2=(b*b-a*c)*( (a-c)*np.sqrt(1+4*b*b/((a-c)*(a-c)))-(c+a))
res1=np.sqrt(up/down1)
res2=np.sqrt(up/down2)
return np.array([res1, res2])
def ellipse_angle_of_rotation2( a ):
b,c,d,f,g,a = a[1]/2, a[2], a[3]/2, a[4]/2, a[5], a[0]
if b == 0:
if a > c:
return 0
else:
return np.pi/2
else:
if a > c:
return np.arctan(2*b/(a-c))/2
else:
return np.pi/2 + np.arctan(2*b/(a-c))/2
# a = fitEllipse(x,y)
# center = ellipse_center(a)
# #phi = ellipse_angle_of_rotation(a)
# phi = ellipse_angle_of_rotation2(a)
# axes = ellipse_axis_length(a)
#
# print("center = ", center)
# print("angle of rotation = ", phi)
# print("axes = ", axes)
# Author: aqeelanwar
# Created: 2 May,2020, 2:49 AM
# Email: aqeel.anwar@gatech.edu
from tkinter import filedialog
from tkinter import *
import cv2, os
mouse_pts = []
def get_mouse_points(event, x, y, flags, param):
global mouseX, mouseY, mouse_pts
if event == cv2.EVENT_LBUTTONDOWN:
mouseX, mouseY = x, y
cv2.circle(mask_im, (x, y), 10, (0, 255, 255), 10)
if "mouse_pts" not in globals():
mouse_pts = []
mouse_pts.append((x, y))
# print("Point detected")
# print((x,y))
root = Tk()
filename = filedialog.askopenfilename(
initialdir="/",
title="Select file",
filetypes=(("PNG files", "*.PNG"), ("png files", "*.png"), ("All files", "*.*")),
)
root.destroy()
filename_split = os.path.split(filename)
folder = filename_split[0]
file = filename_split[1]
file_split = file.split(".")
new_filename = folder + "/" + file_split[0] + "_marked." + file_split[-1]
mask_im = cv2.imread(filename)
cv2.namedWindow("Mask")
cv2.setMouseCallback("Mask", get_mouse_points)
while True:
cv2.imshow("Mask", mask_im)
cv2.waitKey(1)
if len(mouse_pts) == 6:
cv2.destroyWindow("Mask")
break
first_frame_display = False
points = mouse_pts
print(points)
print("----------------------------------------------------------------")
print("Copy the following code and paste it in masks.cfg")
print("----------------------------------------------------------------")
name_points = ["a", "b", "c", "d", "e", "f"]
mask_title = "[" + file_split[0] + "]"
print(mask_title)
print("template: ", filename)
for i in range(len(mouse_pts)):
name = (
"mask_"
+ name_points[i]
+ ": "
+ str(mouse_pts[i][0])
+ ","
+ str(mouse_pts[i][1])
)
print(name)
cv2.imwrite(new_filename, mask_im)
# Author: Aqeel Anwar(ICSRL)
# Created: 9/20/2019, 12:43 PM
# Email: aqeel.anwar@gatech.edu
from configparser import ConfigParser
from dotmap import DotMap
def ConvertIfStringIsInt(input_string):
try:
float(input_string)
try:
if int(input_string) == float(input_string):
return int(input_string)
else:
return float(input_string)
except ValueError:
return float(input_string)
except ValueError:
return input_string
def read_cfg(config_filename="masks/masks.cfg", mask_type="surgical", verbose=False):
parser = ConfigParser()
parser.optionxform = str
parser.read(config_filename)
cfg = DotMap()
section_name = mask_type
if verbose:
hyphens = "-" * int((80 - len(config_filename)) / 2)
print(hyphens + " " + config_filename + " " + hyphens)
# for section_name in parser.sections():
if verbose:
print("[" + section_name + "]")
for name, value in parser.items(section_name):
value = ConvertIfStringIsInt(value)
if name != "template":
cfg[name] = tuple(int(s) for s in value.split(","))
else:
cfg[name] = value
spaces = " " * (30 - len(name))
if verbose:
print(name + ":" + spaces + str(cfg[name]))
return cfg
{
"cells": [
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"from torch.nn import Parameter\n",
"import torch.nn.functional as F\n",
"from torchvision import transforms as tf\n",
"import torch.utils.data as data\n",
"\n",
"import os\n",
"import cv2\n",
"import functools\n",
"import numpy as np\n",
"from PIL import Image\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from models import vgg19"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"model = vgg19(pretrained=True).features[:-2]\n",
"\n",
"model = model.eval()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Sequential(\n",
" (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (1): ReLU(inplace=True)\n",
" (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (3): ReLU(inplace=True)\n",
" (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
" (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (6): ReLU(inplace=True)\n",
" (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (8): ReLU(inplace=True)\n",
" (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
" (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (11): ReLU(inplace=True)\n",
" (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (13): ReLU(inplace=True)\n",
" (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (15): ReLU(inplace=True)\n",
" (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (17): ReLU(inplace=True)\n",
" (18): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
" (19): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (20): ReLU(inplace=True)\n",
" (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (22): ReLU(inplace=True)\n",
" (23): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (24): ReLU(inplace=True)\n",
" (25): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (26): ReLU(inplace=True)\n",
" (27): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
" (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (29): ReLU(inplace=True)\n",
" (30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (31): ReLU(inplace=True)\n",
" (32): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (33): ReLU(inplace=True)\n",
" (34): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
")"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"img = torch.rand(4,3,256,256)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([4, 512, 8, 8])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"out = model(img)\n",
"out.shape"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"class GatedConv2d(nn.Module):\n",
" def __init__(self, in_channels, out_channels, kernel_size, stride = 1, padding = 0, dilation = 1, activation = 'lrelu', norm = 'in'):\n",
" super(GatedConv2d, self).__init__()\n",
" self.pad = nn.ZeroPad2d(padding)\n",
" if norm is not None:\n",
" self.norm = nn.InstanceNorm2d(out_channels)\n",
" else:\n",
" self.norm = None\n",
" \n",
" if activation == 'tanh':\n",
" self.activation = nn.Tanh()\n",
" else:\n",
" self.activation = nn.LeakyReLU(0.2, inplace = True)\n",
" \n",
" \n",
" self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding = 0, dilation = dilation)\n",
" self.mask_conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding = 0, dilation = dilation)\n",
" self.sigmoid = torch.nn.Sigmoid()\n",
" \n",
" def forward(self, x):\n",
" x = self.pad(x)\n",
" conv = self.conv2d(x)\n",
" mask = self.mask_conv2d(x)\n",
" gated_mask = self.sigmoid(mask)\n",
" x = conv * gated_mask\n",
" if self.norm:\n",
" x = self.norm(x)\n",
" if self.activation:\n",
" x = self.activation(x)\n",
" return x\n",
"\n",
"class TransposeGatedConv2d(nn.Module):\n",
" def __init__(self, in_channels, out_channels, kernel_size, stride = 1, padding = 0, dilation = 1, norm=None, scale_factor = 2):\n",
" super(TransposeGatedConv2d, self).__init__()\n",
" # Initialize the conv scheme\n",
" self.scale_factor = scale_factor\n",
" self.gated_conv2d = GatedConv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, norm=norm)\n",
" \n",
" def forward(self, x):\n",
" x = F.interpolate(x, scale_factor = self.scale_factor, mode = 'nearest')\n",
" x = self.gated_conv2d(x)\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"class GatedGenerator(nn.Module):\n",
" def __init__(self, in_channels=4, latent_channels=64, out_channels=3):\n",
" super(GatedGenerator, self).__init__()\n",
" self.coarse = nn.Sequential(\n",
" # encoder\n",
" GatedConv2d(in_channels, latent_channels, 7, 1, 3, norm = None),\n",
" GatedConv2d(latent_channels, latent_channels * 2, 4, 2, 1),\n",
" GatedConv2d(latent_channels * 2, latent_channels * 4, 3, 1, 1),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 4, 2, 1),\n",
" # Bottleneck\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 2, dilation = 2),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 4, dilation = 4),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 8, dilation = 8),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 16, dilation = 16),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),\n",
" # decoder\n",
" TransposeGatedConv2d(latent_channels * 4, latent_channels * 2, 3, 1, 1),\n",
" GatedConv2d(latent_channels * 2, latent_channels * 2, 3, 1, 1),\n",
" TransposeGatedConv2d(latent_channels * 2, latent_channels, 3, 1, 1),\n",
" GatedConv2d(latent_channels, out_channels, 7, 1, 3, activation = 'tanh', norm = None)\n",
" )\n",
" self.refinement = nn.Sequential(\n",
" # encoder\n",
" GatedConv2d(in_channels, latent_channels, 7, 1, 3, norm = None),\n",
" GatedConv2d(latent_channels, latent_channels * 2, 4, 2, 1),\n",
" GatedConv2d(latent_channels * 2, latent_channels * 4, 3, 1, 1),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 4, 2, 1),\n",
" # Bottleneck\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 2, dilation = 2),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 4, dilation = 4),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 8, dilation = 8),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 16, dilation = 16),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),\n",
" GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),\n",
" # decoder\n",
" TransposeGatedConv2d(latent_channels * 4, latent_channels * 2, 3, 1, 1),\n",
" GatedConv2d(latent_channels * 2, latent_channels * 2, 3, 1, 1),\n",
" TransposeGatedConv2d(latent_channels * 2, latent_channels, 3, 1, 1),\n",
" GatedConv2d(latent_channels, out_channels, 7, 1, 3, activation = 'tanh', norm = None)\n",
" )\n",
" \n",
" def forward(self, img, mask):\n",
" # img: entire img\n",
" # mask: 1 for mask region; 0 for unmask region\n",
" # 1 - mask: unmask\n",
" # img * (1 - mask): ground truth unmask region\n",
" # Coarse\n",
" \n",
" first_masked_img = img * (1 - mask) + mask\n",
" first_in = torch.cat((first_masked_img, mask), 1) # in: [B, 4, H, W]\n",
" first_out = self.coarse(first_in) # out: [B, 3, H, W]\n",
" # Refinement\n",
" second_masked_img = img * (1 - mask) + first_out * mask\n",
" second_in = torch.cat((second_masked_img, mask), 1) # in: [B, 4, H, W]\n",
" second_out = self.refinement(second_in) # out: [B, 3, H, W]\n",
" return first_out, second_out"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"class NLayerDiscriminator(nn.Module):\n",
" def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False):\n",
" super(NLayerDiscriminator, self).__init__()\n",
" if type(norm_layer) == functools.partial:\n",
" use_bias = norm_layer.func == nn.InstanceNorm2d\n",
" else:\n",
" use_bias = norm_layer == nn.InstanceNorm2d\n",
"\n",
" kw = 4\n",
" padw = 1\n",
" sequence = [\n",
" nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw),\n",
" nn.LeakyReLU(0.2, True)\n",
" ]\n",
"\n",
" nf_mult = 1\n",
" nf_mult_prev = 1\n",
" for n in range(1, n_layers):\n",
" nf_mult_prev = nf_mult\n",
" nf_mult = min(2**n, 8)\n",
" sequence += [\n",
" nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,\n",
" kernel_size=kw, stride=2, padding=padw, bias=use_bias),\n",
" norm_layer(ndf * nf_mult),\n",
" nn.LeakyReLU(0.2, True)\n",
" ]\n",
"\n",
" nf_mult_prev = nf_mult\n",
" nf_mult = min(2**n_layers, 8)\n",
" sequence += [\n",
" nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,\n",
" kernel_size=kw, stride=1, padding=padw, bias=use_bias),\n",
" norm_layer(ndf * nf_mult),\n",
" nn.LeakyReLU(0.2, True)\n",
" ]\n",
"\n",
" sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)]\n",
"\n",
" if use_sigmoid:\n",
" sequence += [nn.Sigmoid()]\n",
"\n",
" self.model = nn.Sequential(*sequence)\n",
"\n",
" def forward(self, input):\n",
" return self.model(input)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"class PerceptualNet(nn.Module):\n",
" def __init__(self):\n",
" super(PerceptualNet, self).__init__()\n",
" self.features = nn.Sequential(\n",
" nn.Conv2d(3, 64, 3, 1, 1),\n",
" nn.ReLU(inplace = True),\n",
" nn.Conv2d(64, 64, 3, 1, 1),\n",
" nn.ReLU(inplace = True),\n",
" nn.MaxPool2d(2, 2),\n",
" nn.Conv2d(64, 128, 3, 1, 1),\n",
" nn.ReLU(inplace = True),\n",
" nn.Conv2d(128, 128, 3, 1, 1),\n",
" nn.ReLU(inplace = True),\n",
" nn.MaxPool2d(2, 2),\n",
" nn.Conv2d(128, 256, 3, 1, 1),\n",
" nn.ReLU(inplace = True),\n",
" nn.Conv2d(256, 256, 3, 1, 1),\n",
" nn.ReLU(inplace = True),\n",
" nn.Conv2d(256, 256, 3, 1, 1),\n",
" nn.MaxPool2d(2, 2),\n",
" nn.Conv2d(256, 512, 3, 1, 1),\n",
" nn.ReLU(inplace = True),\n",
" nn.Conv2d(512, 512, 3, 1, 1),\n",
" nn.ReLU(inplace = True),\n",
" nn.Conv2d(512, 512, 3, 1, 1)\n",
" )\n",
"\n",
" def forward(self, x):\n",
" x = self.features(x)\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"class GANLoss(nn.Module):\n",
" def __init__(self, target_real_label=1.0, target_fake_label=0.0):\n",
" super(GANLoss, self).__init__()\n",
" self.register_buffer('real_label', torch.tensor(target_real_label))\n",
" self.register_buffer('fake_label', torch.tensor(target_fake_label))\n",
" self.loss = nn.BCELoss()\n",
"\n",
" def get_target_tensor(self, input, target_is_real):\n",
" if target_is_real:\n",
" target_tensor = self.real_label\n",
" else:\n",
" target_tensor = self.fake_label\n",
" return target_tensor.expand_as(input)\n",
"\n",
" def __call__(self, input, target_is_real):\n",
" target_tensor = self.get_target_tensor(input, target_is_real)\n",
" return self.loss(input, target_tensor)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"class InpaintDataset(data.Dataset):\n",
" def __init__(self, img_dir):\n",
" self.img_dir = img_dir\n",
" self.load_images()\n",
" \n",
" def load_images(self):\n",
" self.fns =[]\n",
" img_paths = sorted(os.listdir(self.img_dir))\n",
" for path in img_paths:\n",
" self.fns.append(os.path.join(self.img_dir, path))\n",
" \n",
" def __getitem__(self, index):\n",
" img_path = self.fns[index]\n",
" img = cv2.imread(img_path)\n",
" img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
" img = cv2.resize(img, (256,256))\n",
" \n",
" mask = self.random_ff_mask()\n",
" img = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1).contiguous()\n",
" mask = torch.from_numpy(mask.astype(np.float32)).contiguous()\n",
" return img, mask\n",
" \n",
" def collate_fn(self, batch):\n",
" imgs = torch.stack([i[0] for i in batch])\n",
" masks = torch.stack([i[1] for i in batch])\n",
" return {\n",
" 'imgs': imgs,\n",
" 'masks': masks\n",
" }\n",
" \n",
" def __len__(self):\n",
" return len(self.fns)\n",
" \n",
" def random_ff_mask(self, shape =256 , max_angle = 4, max_len = 40, max_width = 10, times = 15):\n",
" \"\"\"Generate a random free form mask with configuration.\n",
" Args:\n",
" config: Config should have configuration including IMG_SHAPES,\n",
" VERTICAL_MARGIN, HEIGHT, HORIZONTAL_MARGIN, WIDTH.\n",
" Returns:\n",
" tuple: (top, left, height, width)\n",
" \"\"\"\n",
" height = shape\n",
" width = shape\n",
" mask = np.zeros((height, width), np.float32)\n",
" times = np.random.randint(times)\n",
" for i in range(times):\n",
" start_x = np.random.randint(width)\n",
" start_y = np.random.randint(height)\n",
" for j in range(1 + np.random.randint(5)):\n",
" angle = 0.01 + np.random.randint(max_angle)\n",
" if i % 2 == 0:\n",
" angle = 2 * 3.1415926 - angle\n",
" length = 10 + np.random.randint(max_len)\n",
" brush_w = 5 + np.random.randint(max_width)\n",
" end_x = (start_x + length * np.sin(angle)).astype(np.int32)\n",
" end_y = (start_y + length * np.cos(angle)).astype(np.int32)\n",
" cv2.line(mask, (start_y, start_x), (end_y, end_x), 1.0, brush_w)\n",
" start_x, start_y = end_x, end_y\n",
" return mask.reshape((1, ) + mask.shape).astype(np.float32)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset = InpaintDataset(img_dir='datasets/places365standard_easyformat/places365_standard/train/waterfall')\n",
"dataloader = data.DataLoader(dataset, batch_size=4, collate_fn = dataset.collate_fn)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for batch in dataloader:\n",
" imgs = batch['imgs']\n",
" masks = batch['masks']\n",
" \n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"device = torch.device('cuda')"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'GANLoss' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-23-bdcc75eef256>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mmodel_D\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mNLayerDiscriminator\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muse_sigmoid\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mmodel_P\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mPerceptualNet\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mcriterion_adv\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mGANLoss\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[0mcriterion_rec\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mMSELoss\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mcriterion_per\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mL1Loss\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'GANLoss' is not defined"
]
}
],
"source": [
"model_G = GatedGenerator()\n",
"model_D = NLayerDiscriminator(3, use_sigmoid=True)\n",
"model_P = PerceptualNet()\n",
"criterion_adv = GANLoss()\n",
"criterion_rec = nn.MSELoss()\n",
"criterion_per = nn.L1Loss()\n",
"optimizer_D = torch.optim.Adam(model_D.parameters(), lr=1e-4)\n",
"optimizer_G = torch.optim.Adam(model_G.parameters(), lr=1e-4)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"torch.save({\n",
" 'D': model_D.state_dict(),\n",
" 'G': model_G.state_dict()\n",
"}, 's.pth')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def count_params(model):\n",
" return sum(p.numel() for p in model.parameters() if p.requires_grad)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(count_params(model_G))\n",
"print(count_params(model_D))\n",
"print(count_params(model_P))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def random_ff_mask(shape =256 , max_angle = 4, max_len = 40, max_width = 10, times = 15):\n",
" \"\"\"Generate a random free form mask with configuration.\n",
" Args:\n",
" config: Config should have configuration including IMG_SHAPES,\n",
" VERTICAL_MARGIN, HEIGHT, HORIZONTAL_MARGIN, WIDTH.\n",
" Returns:\n",
" tuple: (top, left, height, width)\n",
" \"\"\"\n",
" height = shape\n",
" width = shape\n",
" mask = np.zeros((height, width), np.float32)\n",
" times = np.random.randint(times)\n",
" for i in range(times):\n",
" start_x = np.random.randint(width)\n",
" start_y = np.random.randint(height)\n",
" for j in range(1 + np.random.randint(5)):\n",
" angle = 0.01 + np.random.randint(max_angle)\n",
" if i % 2 == 0:\n",
" angle = 2 * 3.1415926 - angle\n",
" length = 10 + np.random.randint(max_len)\n",
" brush_w = 5 + np.random.randint(max_width)\n",
" end_x = (start_x + length * np.sin(angle)).astype(np.int32)\n",
" end_y = (start_y + length * np.cos(angle)).astype(np.int32)\n",
" cv2.line(mask, (start_y, start_x), (end_y, end_x), 1.0, brush_w)\n",
" start_x, start_y = end_x, end_y\n",
" return mask.reshape((1, ) + mask.shape).astype(np.float32)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"img = cv2.imread('datasets/places365standard_easyformat/places365_standard/train/waterfall/00000003.jpg')\n",
"img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
"img = cv2.resize(img, (256, 256))\n",
"img = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1).contiguous()\n",
"img_tensor = img.unsqueeze(0)\n",
"mask = random_ff_mask()\n",
"mask = torch.from_numpy(mask).contiguous().unsqueeze(0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def visualize(img):\n",
" np_img = img.squeeze(0).detach().cpu().numpy()\n",
" return np_img.transpose(1, 2, 0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(visualize(first_out_wholeimg))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"first_out, second_out = model_G(img_tensor, mask)\n",
"\n",
"first_out_wholeimg = img_tensor * (1 - mask) + first_out * mask \n",
"second_out_wholeimg = img_tensor * (1 - mask) + second_out * mask"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# Train discriminator\n",
"optimizer_D.zero_grad()\n",
"\n",
"fake_D = model_D(second_out_wholeimg.detach())\n",
"real_D = model_D(img_tensor)\n",
"\n",
"loss_fake_D = criterion_adv(fake_D, target_is_real=False)\n",
"loss_real_D = criterion_adv(real_D, target_is_real=True)\n",
"\n",
"loss_D = (loss_fake_D + loss_real_D) *0.5\n",
"\n",
"loss_D.backward()\n",
"optimizer_D.step()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# Train Generator\n",
"\n",
"optimizer_G.zero_grad()\n",
"\n",
"fake_D = model_D(second_out_wholeimg)\n",
"G_loss = criterion_adv(fake_D, target_is_real=True)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# Reconstruction loss\n",
"\n",
"loss_rec_1 = criterion_rec(first_out_wholeimg, img_tensor)\n",
"loss_rec_2 = criterion_rec(second_out_wholeimg, img_tensor)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# Perceptual loss\n",
"\n",
"img_featuremaps = model_P(img_tensor) # feature maps\n",
"second_out_wholeimg_featuremaps = model_P(second_out_wholeimg)\n",
"\n",
"loss_P = criterion_per(second_out_wholeimg_featuremaps, img_featuremaps)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'lambda_G' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-18-dbfe5f51e2fc>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mloss\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlambda_G\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mG_loss\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mlambda_rec_1\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mloss_rec_1\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mlambda_rec_2\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mloss_rec_2\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mlambda_per\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mloss_P\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mloss\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0moptimizer_G\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'lambda_G' is not defined"
]
}
],
"source": [
"loss = lambda_G * G_loss + lambda_rec_1 * loss_rec_1 + lambda_rec_2 * loss_rec_2 + lambda_per * loss_P\n",
"loss.backward()\n",
"optimizer_G.step()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
from .config import Config
\ No newline at end of file
import yaml
class Config():
def __init__(self, yaml_path):
yaml_file = open(yaml_path)
self._attr = yaml.load(yaml_file, Loader=yaml.FullLoader)['settings']
def __getattr__(self, attr):
try:
return self._attr[attr]
except KeyError:
return None
settings:
root_dir: "./datasets/celeba/images/"
checkpoint_path: "weights"
sample_folder: "sample"
cuda: True
lr: 0.001
batch_size: 2
num_workers: 4
step_iters: [10000, 15000, 20000]
gamma: 0.1
d_num_layers: 3
visualize_per_iter: 500
save_per_iter: 500
print_per_iter: 10
num_epochs: 100
lambda_G: 1.0
lambda_rec_1: 100.0
lambda_rec_2: 100.0
lambda_per: 10.0
img_size: 512
settings:
root_dir: "./datasets/places365_10classes"
checkpoint_path: "/content/drive/MyDrive/weights/Places365 Inpainting/phase 3"
sample_folder: "/content/drive/MyDrive/results/Places365 Inpainting/phase 3"
cuda: True
lr: 0.0001
batch_size: 8
num_workers: 4
step_iters: [50000, 75000, 100000]
gamma: 0.1
d_num_layers: 3
visualize_per_iter: 500
save_per_iter: 500
print_per_iter: 10
num_epochs: 100
lambda_G: 0.3
lambda_rec_1: 10.0
lambda_rec_2: 10.0
lambda_per: 1.0
img_size: 256
max_angle: 4
max_len: 50
max_width: 30
times: 15
settings:
root_dir: "./datasets/celeba/images/"
train_anns: "./datasets/celeba/annotations/train.csv"
val_anns: "./datasets/celeba/annotations/val.csv"
checkpoint_path: "weights" #"/content/drive/MyDrive/weights/Places365 Inpainting/unet/phase 1"
sample_folder: "sample" #"/content/drive/MyDrive/results/Places365 Inpainting/unet/phase 1"
cuda: True
lr: 0.001
batch_size: 4
num_workers: 4
step_iters: [50000, 75000, 100000]
gamma: 0.1
visualize_per_iter: 1000
save_per_iter: 1000
print_per_iter: 10
num_epochs: 100
img_size: 512
from .dataset import Places365Dataset, FacemaskDataset
from .dataset_seg import FacemaskSegDataset
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
import os
import csv
f = open("./datasets/celeba/annotations/train.csv", "w", newline="")
wr = csv.writer(f)
wr.writerow(["_", "img_name", "mask_name"])
for i in range(23304):
wr.writerow(
[
i,
"celeba512_30k_masked/"
+ os.listdir("./datasets/celeba/images/celeba512_30k_masked")[i],
"celeba512_30k_binary/"
+ os.listdir("./datasets/celeba/images/celeba512_30k_binary")[i],
]
)
f.close()
f = open("./datasets/celeba/annotations/val.csv", "w", newline="")
wr = csv.writer(f)
wr.writerow(["_", "img_name", "mask_name"])
for i in range(23304, 29131):
wr.writerow(
[
i,
"celeba512_30k_masked/"
+ os.listdir("./datasets/celeba/images/celeba512_30k_masked")[i],
"celeba512_30k_binary/"
+ os.listdir("./datasets/celeba/images/celeba512_30k_binary")[i],
]
)
f.close()
import os
import torch
import torch.nn as nn
import torch.utils.data as data
import cv2
import numpy as np
from tqdm import tqdm
class Places365Dataset(data.Dataset):
def __init__(self, cfg):
self.root_dir = cfg.root_dir
self.cfg = cfg
self.load_images()
def load_images(self):
self.fns =[]
idx = 0
img_paths = os.listdir(self.root_dir)
for cls_id in img_paths:
paths = os.path.join(self.root_dir, cls_id)
file_paths = os.listdir(paths)
for img_name in file_paths:
filename = os.path.join(paths, img_name)
self.fns.append(filename)
def __getitem__(self, index):
img_path = self.fns[index]
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (self.cfg.img_size, self.cfg.img_size))
mask = self.random_ff_mask(
shape = self.cfg.img_size,
max_angle = self.cfg.max_angle,
max_len = self.cfg.max_len,
max_width = self.cfg.max_width,
times = self.cfg.times)
img = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1).contiguous()
mask = torch.from_numpy(mask.astype(np.float32)).contiguous()
return img, mask
def collate_fn(self, batch):
imgs = torch.stack([i[0] for i in batch])
masks = torch.stack([i[1] for i in batch])
return {
'imgs': imgs,
'masks': masks
}
def __len__(self):
return len(self.fns)
def random_ff_mask(self, shape = 256 , max_angle = 4, max_len = 50, max_width = 20, times = 15):
"""Generate a random free form mask with configuration.
Args:
config: Config should have configuration including IMG_SHAPES,
VERTICAL_MARGIN, HEIGHT, HORIZONTAL_MARGIN, WIDTH.
Returns:
tuple: (top, left, height, width)
"""
height = shape
width = shape
mask = np.zeros((height, width), np.float32)
times = np.random.randint(10, times)
for i in range(times):
start_x = np.random.randint(width)
start_y = np.random.randint(height)
for j in range(1 + np.random.randint(5)):
angle = 0.01 + np.random.randint(max_angle)
if i % 2 == 0:
angle = 2 * 3.1415926 - angle
length = 10 + np.random.randint(max_len)
brush_w = 5 + np.random.randint(max_width)
end_x = (start_x + length * np.sin(angle)).astype(np.int32)
end_y = (start_y + length * np.cos(angle)).astype(np.int32)
cv2.line(mask, (start_y, start_x), (end_y, end_x), 1.0, brush_w)
start_x, start_y = end_x, end_y
return mask.reshape((1, ) + mask.shape).astype(np.float32)
class FacemaskDataset(data.Dataset):
def __init__(self, cfg):
self.root_dir = cfg.root_dir
self.cfg = cfg
self.mask_folder = os.path.join(self.root_dir, 'celeba512_30k_binary')
self.img_folder = os.path.join(self.root_dir, 'celeba512_30k')
self.load_images()
def load_images(self):
self.fns = []
idx = 0
img_paths = sorted(os.listdir(self.img_folder))
for img_name in img_paths:
mask_name = img_name.split('.')[0]+'_binary.jpg'
img_path = os.path.join(self.img_folder, img_name)
mask_path = os.path.join(self.mask_folder, mask_name)
if os.path.isfile(mask_path):
self.fns.append([img_path, mask_path])
def __getitem__(self, index):
img_path, mask_path = self.fns[index]
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (self.cfg.img_size, self.cfg.img_size))
mask = cv2.imread(mask_path, 0)
mask[mask>0]=1.0
mask = np.expand_dims(mask, axis=0)
img = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1).contiguous()
mask = torch.from_numpy(mask.astype(np.float32)).contiguous()
return img, mask
def collate_fn(self, batch):
imgs = torch.stack([i[0] for i in batch])
masks = torch.stack([i[1] for i in batch])
return {
'imgs': imgs,
'masks': masks
}
def __len__(self):
return len(self.fns)
\ No newline at end of file
import os
import torch
import torch.nn as nn
import torch.utils.data as data
import cv2
import numpy as np
from tqdm import tqdm
import pandas as pd
from PIL import Image
class FacemaskSegDataset(data.Dataset):
def __init__(self, cfg, train=True):
self.root_dir = cfg.root_dir
self.cfg = cfg
self.train = train
if self.train:
self.df = pd.read_csv(cfg.train_anns)
else:
self.df = pd.read_csv(cfg.val_anns)
self.load_images()
def load_images(self):
self.fns = []
for idx, rows in self.df.iterrows():
_, img_name, mask_name = rows
img_path = os.path.join(self.root_dir, img_name)
mask_path = os.path.join(self.root_dir, mask_name)
img_path = img_path.replace("\\", "/")
mask_path = mask_path.replace("\\", "/")
if os.path.isfile(mask_path):
self.fns.append([img_path, mask_path])
def __getitem__(self, index):
img_path, mask_path = self.fns[index]
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (self.cfg.img_size, self.cfg.img_size))
mask = cv2.imread(mask_path, 0)
mask[mask > 0] = 1.0
mask = np.expand_dims(mask, axis=0)
img = (
torch.from_numpy(img.astype(np.float32) / 255.0)
.permute(2, 0, 1)
.contiguous()
)
mask = torch.from_numpy(mask.astype(np.float32)).contiguous()
return img, mask
def collate_fn(self, batch):
imgs = torch.stack([i[0] for i in batch])
masks = torch.stack([i[1] for i in batch])
return {"imgs": imgs, "masks": masks}
def __len__(self):
return len(self.fns)
import torch
import torch.nn as nn
from torchvision.utils import save_image
import numpy as np
from PIL import Image
import cv2
from models import UNetSemantic, GatedGenerator
import argparse
from configs import Config
class Predictor():
def __init__(self, cfg):
self.cfg = cfg
self.device = torch.device('cuda:0' if cfg.cuda else 'cpu')
self.masking = UNetSemantic().to(self.device)
self.masking.load_state_dict(torch.load('weights\model_segm_19_135000.pth', map_location='cpu'))
#self.masking.eval()
self.inpaint = GatedGenerator().to(self.device)
self.inpaint.load_state_dict(torch.load('weights/model_6_100000.pth', map_location='cpu')['G'])
self.inpaint.eval()
def save_image(self, img_list, save_img_path, nrow):
img_list = [i.clone().cpu() for i in img_list]
imgs = torch.stack(img_list, dim=1)
imgs = imgs.view(-1, *list(imgs.size())[2:])
save_image(imgs, save_img_path, nrow = nrow)
print(f"Save image to {save_img_path}")
def predict(self, image, outpath='sample/results.png'):
outpath=f'sample/results_{image}.png'
image = 'sample/'+image
img = cv2.imread(image+'_masked.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (self.cfg.img_size, self.cfg.img_size))
img = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1).contiguous()
img = img.unsqueeze(0).to(self.device)
img_ori = cv2.imread(image+'.jpg')
img_ori = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB)
img_ori = cv2.resize(img_ori, (self.cfg.img_size, self.cfg.img_size))
img_ori = torch.from_numpy(img_ori.astype(np.float32) / 255.0).permute(2, 0, 1).contiguous()
img_ori = img_ori.unsqueeze(0)
with torch.no_grad():
outputs = self.masking(img)
_, out = self.inpaint(img, outputs)
inpaint = img * (1 - outputs) + out * outputs
masks = img * (1 - outputs) + outputs #torch.cat([outputs, outputs, outputs], dim=1)
self.save_image([img, masks, inpaint, img_ori], outpath, nrow=4)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Training custom model')
parser.add_argument('--image', default=None, type=str, help='resume training')
parser.add_argument('config', default='config', type=str, help='config training')
args = parser.parse_args()
config = Config(f'./configs/{args.config}.yaml')
model = Predictor(config)
model.predict(args.image)
\ No newline at end of file
from .loggers import *
\ No newline at end of file
import os
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
class Logger():
"""
Logger for Tensorboard visualization
:param log_dir: Path to save checkpoint
"""
def __init__(self, log_dir=None):
self.log_dir = log_dir
if self.log_dir is None:
self.log_dir = os.path.join('loggers/runs',datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
if not os.path.exists(self.log_dir):
os.mkdir(self.log_dir)
self.writer = SummaryWriter(log_dir=self.log_dir)
self.iters = {}
def write(self, tags, values):
"""
Write a log to specified directory
:param tags: (str) tag for log
:param values: (number) value for corresponding tag
"""
if not isinstance(tags, list):
tags = list(tags)
if not isinstance(values, list):
values = list(values)
for i, (tag, value) in enumerate(zip(tags,values)):
if tag not in self.iters.keys():
self.iters[tag] = 0
self.writer.add_scalar(tag, value, self.iters[tag])
self.iters[tag] += 1
from .adversarial import GANLoss
from .ssim import SSIM
from .dice import DiceLoss
\ No newline at end of file
import torch
import torch.nn as nn
class GANLoss(nn.Module):
def __init__(self, target_real_label=1.0, target_fake_label=0.0):
super(GANLoss, self).__init__()
self.register_buffer('real_label', torch.tensor(target_real_label))
self.register_buffer('fake_label', torch.tensor(target_fake_label))
self.loss = nn.MSELoss()
def get_target_tensor(self, input, target_is_real):
if target_is_real:
target_tensor = self.real_label
else:
target_tensor = self.fake_label
return target_tensor.expand_as(input)
def __call__(self, input, target_is_real):
target_tensor = self.get_target_tensor(input, target_is_real).to(input.device)
return self.loss(input, target_tensor)
\ No newline at end of file
import torch
import torch.nn as nn
class DiceLoss(nn.Module):
"""
Dice loss of binary class
Args:
smooth: A float number to smooth loss, and avoid NaN error, default: 1
p: Denominator value: \sum{x^p} + \sum{y^p}, default: 2
predict: A tensor of shape [N, *]
target: A tensor of shape same with predict
reduction: Reduction method to apply, return mean over batch if 'mean',
return sum if 'sum', return a tensor of shape [N,] if 'none'
Returns:
Loss tensor according to arg reduction
Raise:
Exception if unexpected reduction
"""
def __init__(self, smooth=1, p=2, reduction='mean'):
super(DiceLoss, self).__init__()
self.smooth = smooth
self.p = p
self.reduction = reduction
def forward(self, predict, target):
assert predict.shape[0] == target.shape[0], "predict & target batch size don't match"
predict = predict.contiguous().view(predict.shape[0], -1)
target = target.contiguous().view(target.shape[0], -1)
num = torch.sum(torch.mul(predict, target), dim=1) + self.smooth
den = torch.sum(predict.pow(self.p) + target.pow(self.p), dim=1) + self.smooth
loss = 1 - num / den
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
elif self.reduction == 'none':
return loss
else:
raise Exception('Unexpected reduction {}'.format(self.reduction))
#Source: https://github.com/Po-Hsun-Su/pytorch-ssim.git
import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from math import exp
def gaussian(window_size, sigma):
gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
return gauss/gauss.sum()
def create_window(window_size, channel):
_1D_window = gaussian(window_size, 1.5).unsqueeze(1)
_2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
return window
def _ssim(img1, img2, window, window_size, channel, size_average = True):
mu1 = F.conv2d(img1, window, padding = window_size//2, groups = channel)
mu2 = F.conv2d(img2, window, padding = window_size//2, groups = channel)
mu1_sq = mu1.pow(2)
mu2_sq = mu2.pow(2)
mu1_mu2 = mu1*mu2
sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq
sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq
sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2
C1 = 0.01**2
C2 = 0.03**2
ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2))
if size_average:
return ssim_map.mean()
else:
return ssim_map.mean(1).mean(1).mean(1)
class SSIM(torch.nn.Module):
def __init__(self, window_size = 11, size_average = True):
super(SSIM, self).__init__()
self.window_size = window_size
self.size_average = size_average
self.channel = 1
self.window = create_window(window_size, self.channel)
def forward(self, img1, img2):
(_, channel, _, _) = img1.size()
if channel == self.channel and self.window.data.type() == img1.data.type():
window = self.window
else:
window = create_window(self.window_size, channel)
if img1.is_cuda:
window = window.cuda(img1.get_device())
window = window.type_as(img1)
self.window = window
self.channel = channel
return _ssim(img1, img2, window, self.window_size, channel, self.size_average)
def ssim(img1, img2, window_size = 11, size_average = True):
(_, channel, _, _) = img1.size()
window = create_window(window_size, channel)
if img1.is_cuda:
window = window.cuda(img1.get_device())
window = window.type_as(img1)
return _ssim(img1, img2, window, window_size, channel, size_average)
\ No newline at end of file
from .dicecoeff import DiceScore
from .pixelacc import PixelAccuracy
\ No newline at end of file
import torch
import torch.nn as nn
import numpy as np
class DiceScore():
def __init__(self, num_classes, ignore_index = None, eps=1e-6, thresh = 0.5):
self.thresh = thresh
self.num_classes = num_classes
self.pred_type = "multi" if num_classes > 1 else "binary"
if num_classes == 1:
self.num_classes+=1
self.ignore_index = ignore_index
self.eps = eps
self.scores_list = np.zeros(self.num_classes)
self.reset()
def compute(self, outputs, targets):
# outputs: (batch, num_classes, W, H)
# targets: (batch, num_classes, W, H)
batch_size, _ , w, h = outputs.shape
if len(targets.shape) == 3:
targets = targets.unsqueeze(1)
one_hot_targets = torch.zeros(batch_size, self.num_classes, h, w)
one_hot_predicts = torch.zeros(batch_size, self.num_classes, h, w)
if self.pred_type == 'binary':
predicts = (outputs > self.thresh).float()
elif self.pred_type =='multi':
predicts = torch.argmax(outputs, dim=1).unsqueeze(1)
one_hot_targets.scatter_(1, targets.long(), 1)
one_hot_predicts.scatter_(1, predicts.long(), 1)
for cl in range(self.num_classes):
cl_pred = one_hot_predicts[:,cl,:,:]
cl_target = one_hot_targets[:,cl,:,:]
score = self.binary_compute(cl_pred, cl_target)
self.scores_list[cl] += sum(score)
def binary_compute(self, predict, target):
# outputs: (batch, 1, W, H)
# targets: (batch, 1, W, H)
intersect = (predict * target).sum((-2,-1))
union = (predict + target).sum((-2,-1))
return (2. * intersect + self.eps) / (union +self.eps)
def reset(self):
self.scores_list = np.zeros(self.num_classes)
self.sample_size = 0
def update(self, outputs, targets):
self.sample_size += outputs.shape[0]
self.compute(outputs, targets)
def value(self):
scores_each_class = self.scores_list / self.sample_size #mean over number of samples
if self.pred_type == 'binary':
scores = scores_each_class[1] # ignore background which is label 0
else:
scores = sum(scores_each_class) / self.num_classes
return np.round(scores, decimals=4)
def summary(self):
class_iou = self.scores_list / self.sample_size #mean
print(f'{self.value()}')
for i, x in enumerate(class_iou):
print(f'\tClass {i}: {x:.4f}')
def __str__(self):
return f'Dice Score: {self.value()}'
def __len__(self):
return len(self.sample_size)
\ No newline at end of file
import torch
import torch.nn as nn
import numpy as np
class PixelAccuracy():
def __init__(self, num_classes, ignore_index=None, eps=1e-6, thresh = 0.5):
self.thresh = thresh
self.num_classes = num_classes
self.pred_type = "multi" if num_classes > 1 else "binary"
if num_classes == 1:
self.num_classes+=1
self.ignore_index = ignore_index
self.eps = eps
self.scores_list = np.zeros(self.num_classes)
self.reset()
def compute(self, outputs, targets):
# outputs: (batch, num_classes, W, H)
# targets: (batch, num_classes, W, H)
batch_size, _ , w, h = outputs.shape
if len(targets.shape) == 3:
targets = targets.unsqueeze(1)
one_hot_targets = torch.zeros(batch_size, self.num_classes, h, w)
one_hot_predicts = torch.zeros(batch_size, self.num_classes, h, w)
if self.pred_type == 'binary':
predicts = (outputs > self.thresh).float()
elif self.pred_type =='multi':
predicts = torch.argmax(outputs, dim=1).unsqueeze(1)
one_hot_targets.scatter_(1, targets.long(), 1)
one_hot_predicts.scatter_(1, predicts.long(), 1)
for cl in range(self.num_classes):
cl_pred = one_hot_predicts[:,cl,:,:]
cl_target = one_hot_targets[:,cl,:,:]
score = self.binary_compute(cl_pred, cl_target)
self.scores_list[cl] += sum(score)
def binary_compute(self, predict, target):
# predict: (batch, 1, W, H)
# targets: (batch, 1, W, H)
correct = (predict == target).sum((-2,-1))
total = target.shape[-1] * target.shape[-2]
return (correct + self.eps) *1.0 / (total +self.eps)
def reset(self):
self.scores_list = np.zeros(self.num_classes)
self.sample_size = 0
def update(self, outputs, targets):
self.sample_size += outputs.shape[0]
self.compute(outputs, targets)
def value(self):
scores_each_class = self.scores_list / self.sample_size #mean over number of samples
if self.pred_type == 'binary':
scores = scores_each_class[1] # ignore background which is label 0
else:
scores = sum(scores_each_class) / self.num_classes
return np.round(scores, decimals=4)
def summary(self):
class_iou = self.scores_list / self.sample_size #mean
print(f'{self.value()}')
for i, x in enumerate(class_iou):
print(f'\tClass {i}: {x:.4f}')
def __str__(self):
return f'Pixel Accuracy: {self.value()}'
def __len__(self):
return len(self.sample_size)
\ No newline at end of file
from .networks import GatedGenerator, NLayerDiscriminator, PerceptualNet
from .unet import UNetSemantic
import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
import torch.utils.data as data
import functools
from torchvision.models import vgg19, vgg16
class GatedConv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride = 1, padding = 0, dilation = 1, activation = 'lrelu', norm = 'in'):
super(GatedConv2d, self).__init__()
self.pad = nn.ZeroPad2d(padding)
if norm is not None:
self.norm = nn.InstanceNorm2d(out_channels)
else:
self.norm = None
if activation == 'tanh':
self.activation = nn.Tanh()
else:
self.activation = nn.LeakyReLU(0.2, inplace = True)
self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding = 0, dilation = dilation)
self.mask_conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding = 0, dilation = dilation)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
x = self.pad(x)
conv = self.conv2d(x)
mask = self.mask_conv2d(x)
gated_mask = self.sigmoid(mask)
x = conv * gated_mask
if self.norm:
x = self.norm(x)
if self.activation:
x = self.activation(x)
return x
class TransposeGatedConv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride = 1, padding = 0, dilation = 1, norm=None, scale_factor = 2):
super(TransposeGatedConv2d, self).__init__()
# Initialize the conv scheme
self.scale_factor = scale_factor
self.gated_conv2d = GatedConv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, norm=norm)
def forward(self, x):
x = F.interpolate(x, scale_factor = self.scale_factor, mode = 'nearest')
x = self.gated_conv2d(x)
return x
class GatedGenerator(nn.Module):
def __init__(self, in_channels=4, latent_channels=64, out_channels=3):
super(GatedGenerator, self).__init__()
self.coarse = nn.Sequential(
# encoder
GatedConv2d(in_channels, latent_channels, 7, 1, 3, norm = None),
GatedConv2d(latent_channels, latent_channels * 2, 4, 2, 1),
GatedConv2d(latent_channels * 2, latent_channels * 4, 3, 1, 1),
GatedConv2d(latent_channels * 4, latent_channels * 4, 4, 2, 1),
# Bottleneck
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 2, dilation = 2),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 4, dilation = 4),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 8, dilation = 8),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 16, dilation = 16),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),
# decoder
TransposeGatedConv2d(latent_channels * 4, latent_channels * 2, 3, 1, 1),
GatedConv2d(latent_channels * 2, latent_channels * 2, 3, 1, 1),
TransposeGatedConv2d(latent_channels * 2, latent_channels, 3, 1, 1),
GatedConv2d(latent_channels, out_channels, 7, 1, 3, activation = 'tanh', norm = None)
)
self.refinement = nn.Sequential(
# encoder
GatedConv2d(in_channels, latent_channels, 7, 1, 3, norm = None),
GatedConv2d(latent_channels, latent_channels * 2, 4, 2, 1),
GatedConv2d(latent_channels * 2, latent_channels * 4, 3, 1, 1),
GatedConv2d(latent_channels * 4, latent_channels * 4, 4, 2, 1),
# Bottleneck
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 2, dilation = 2),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 4, dilation = 4),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 8, dilation = 8),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 16, dilation = 16),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),
GatedConv2d(latent_channels * 4, latent_channels * 4, 3, 1, 1),
# decoder
TransposeGatedConv2d(latent_channels * 4, latent_channels * 2, 3, 1, 1),
GatedConv2d(latent_channels * 2, latent_channels * 2, 3, 1, 1),
TransposeGatedConv2d(latent_channels * 2, latent_channels, 3, 1, 1),
GatedConv2d(latent_channels, out_channels, 7, 1, 3, activation = 'tanh', norm = None)
)
def forward(self, img, mask):
# img: entire img
# mask: 1 for mask region; 0 for unmask region
# 1 - mask: unmask
# img * (1 - mask): ground truth unmask region
# Coarse
first_masked_img = img * (1 - mask) + mask
first_in = torch.cat((first_masked_img, mask), 1) # in: [B, 4, H, W]
first_out = self.coarse(first_in) # out: [B, 3, H, W]
# Refinement
second_masked_img = img * (1 - mask) + first_out * mask
second_in = torch.cat((second_masked_img, mask), 1) # in: [B, 4, H, W]
second_out = self.refinement(second_in) # out: [B, 3, H, W]
return first_out, second_out
class NLayerDiscriminator(nn.Module):
def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False):
super(NLayerDiscriminator, self).__init__()
if type(norm_layer) == functools.partial:
use_bias = norm_layer.func == nn.InstanceNorm2d
else:
use_bias = norm_layer == nn.InstanceNorm2d
kw = 4
padw = 1
sequence = [
nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw),
nn.LeakyReLU(0.2, True)
]
nf_mult = 1
nf_mult_prev = 1
for n in range(1, n_layers):
nf_mult_prev = nf_mult
nf_mult = min(2**n, 8)
sequence += [
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,
kernel_size=kw, stride=2, padding=padw, bias=use_bias),
norm_layer(ndf * nf_mult),
nn.LeakyReLU(0.2, True)
]
nf_mult_prev = nf_mult
nf_mult = min(2**n_layers, 8)
sequence += [
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,
kernel_size=kw, stride=1, padding=padw, bias=use_bias),
norm_layer(ndf * nf_mult),
nn.LeakyReLU(0.2, True)
]
sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)]
if use_sigmoid:
sequence += [nn.Sigmoid()]
self.model = nn.Sequential(*sequence)
def forward(self, input):
return self.model(input)
class PerceptualNet(nn.Module):
# https://gist.github.com/alper111/8233cdb0414b4cb5853f2f730ab95a49
def __init__(self, name = "vgg19", resize=True):
super(PerceptualNet, self).__init__()
blocks = []
if name == "vgg19":
blocks.append(vgg19(pretrained=True).features[:4].eval())
blocks.append(vgg19(pretrained=True).features[4:9].eval())
blocks.append(vgg19(pretrained=True).features[9:16].eval())
blocks.append(vgg19(pretrained=True).features[16:23].eval())
elif name == "vgg16":
blocks.append(vgg16(pretrained=True).features[:4].eval())
blocks.append(vgg16(pretrained=True).features[4:9].eval())
blocks.append(vgg16(pretrained=True).features[9:16].eval())
blocks.append(vgg16(pretrained=True).features[16:23].eval())
else:
assert "wrong model name"
for bl in blocks:
for p in bl:
p.requires_grad = False
self.blocks = torch.nn.ModuleList(blocks)
self.transform = torch.nn.functional.interpolate
self.mean = torch.nn.Parameter(torch.tensor([0.485, 0.456, 0.406]).view(1,3,1,1))
self.std = torch.nn.Parameter(torch.tensor([0.229, 0.224, 0.225]).view(1,3,1,1))
self.resize = resize
def forward(self, inputs, targets):
if inputs.shape[1] != 3:
inputs = inputs.repeat(1, 3, 1, 1)
targets = targets.repeat(1, 3, 1, 1)
inputs = (inputs-self.mean) / self.std
targets = (targets-self.mean) / self.std
if self.resize:
inputs = self.transform(inputs, mode='bilinear', size=(512, 512), align_corners=False)
targets = self.transform(targets, mode='bilinear', size=(512, 512), align_corners=False)
loss = 0.0
x = inputs
y = targets
for block in self.blocks:
x = block(x)
y = block(y)
loss += torch.nn.functional.l1_loss(x, y)
return loss
import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
import torch.utils.data as data
import functools
class conv_block(nn.Module):
"""
Convolution Block
"""
def __init__(self, in_ch, out_ch):
super(conv_block, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True))
def forward(self, x):
x = self.conv(x)
return x
class up_conv(nn.Module):
"""
Up Convolution Block
"""
def __init__(self, in_ch, out_ch):
super(up_conv, self).__init__()
self.up = nn.Sequential(
nn.Upsample(scale_factor=2),
nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True)
)
def forward(self, x):
x = self.up(x)
return x
class Recurrent_block(nn.Module):
"""
Recurrent Block for R2Unet_CNN
"""
def __init__(self, out_ch, t=2):
super(Recurrent_block, self).__init__()
self.t = t
self.out_ch = out_ch
self.conv = nn.Sequential(
nn.Conv2d(out_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True)
)
def forward(self, x):
for i in range(self.t):
if i == 0:
x = self.conv(x)
out = self.conv(x + x)
return out
class RRCNN_block(nn.Module):
"""
Recurrent Residual Convolutional Neural Network Block
"""
def __init__(self, in_ch, out_ch, t=2):
super(RRCNN_block, self).__init__()
self.RCNN = nn.Sequential(
Recurrent_block(out_ch, t=t),
Recurrent_block(out_ch, t=t)
)
self.Conv = nn.Conv2d(in_ch, out_ch, kernel_size=1, stride=1, padding=0)
def forward(self, x):
x1 = self.Conv(x)
x2 = self.RCNN(x1)
out = x1 + x2
return out
class Attention_block(nn.Module):
"""
Attention Block
"""
def __init__(self, F_g, F_l, F_int):
super(Attention_block, self).__init__()
self.W_g = nn.Sequential(
nn.Conv2d(F_l, F_int, kernel_size=1, stride=1, padding=0, bias=True),
nn.BatchNorm2d(F_int)
)
self.W_x = nn.Sequential(
nn.Conv2d(F_g, F_int, kernel_size=1, stride=1, padding=0, bias=True),
nn.BatchNorm2d(F_int)
)
self.psi = nn.Sequential(
nn.Conv2d(F_int, 1, kernel_size=1, stride=1, padding=0, bias=True),
nn.BatchNorm2d(1),
nn.Sigmoid()
)
self.relu = nn.ReLU(inplace=True)
def forward(self, g, x):
g1 = self.W_g(g)
x1 = self.W_x(x)
psi = self.relu(g1 + x1)
psi = self.psi(psi)
out = x * psi
return out
class SE_Block(nn.Module):
"credits: https://github.com/moskomule/senet.pytorch/blob/master/senet/se_module.py#L4"
def __init__(self, c, r=16):
super().__init__()
self.squeeze = nn.AdaptiveAvgPool2d(1)
self.excitation = nn.Sequential(
nn.Linear(c, c // r, bias=False),
nn.ReLU(inplace=True),
nn.Linear(c // r, c, bias=False),
nn.Sigmoid()
)
def forward(self, x):
bs, c, _, _ = x.shape
y = self.squeeze(x).view(bs, c)
y = self.excitation(y).view(bs, c, 1, 1)
return x * y.expand_as(x)
class AtrousConv(nn.Module):
def __init__(self, in_ch):
super().__init__()
self.atrous_conv = nn.Sequential(
nn.Conv2d(in_ch, in_ch, kernel_size=3, stride=1, dilation=2, padding=2),
nn.BatchNorm2d(in_ch),
nn.ReLU(),
nn.Conv2d(in_ch, in_ch, kernel_size=3, stride=1, dilation=4, padding=4),
nn.BatchNorm2d(in_ch),
nn.ReLU(),
nn.Conv2d(in_ch, in_ch, kernel_size=3, stride=1, dilation=8, padding=8),
nn.BatchNorm2d(in_ch),
nn.ReLU(),
nn.Conv2d(in_ch, in_ch, kernel_size=3, stride=1, dilation=16, padding=16),
nn.BatchNorm2d(in_ch),
nn.ReLU(),
)
def forward(self, x):
return self.atrous_conv(x)
class UNetSemantic(nn.Module):
"""
UNet - Basic Implementation
Paper : https://arxiv.org/abs/1505.04597
"""
def __init__(self, in_ch=3, out_ch=1):
super(UNetSemantic, self).__init__()
n1 = 32
filters = [n1, n1 * 2, n1 * 4, n1 * 8, n1 * 16]
self.Maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.Maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.Maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
self.Maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
self.Conv1 = conv_block(in_ch, filters[0])
self.Conv2 = conv_block(filters[0], filters[1])
self.Conv3 = conv_block(filters[1], filters[2])
self.Conv4 = conv_block(filters[2], filters[3])
self.Conv5 = conv_block(filters[3], filters[4])
self.Up5 = up_conv(filters[4], filters[3])
self.Up_conv5 = conv_block(filters[4], filters[3])
self.Up4 = up_conv(filters[3], filters[2])
self.Up_conv4 = conv_block(filters[3], filters[2])
self.Up3 = up_conv(filters[2], filters[1])
self.Up_conv3 = conv_block(filters[2], filters[1])
self.Up2 = up_conv(filters[1], filters[0])
self.Up_conv2 = conv_block(filters[1], filters[0])
self.Conv = nn.Conv2d(filters[0], out_ch, kernel_size=1, stride=1, padding=0)
self.se1 = SE_Block(filters[0])
self.se2 = SE_Block(filters[1])
self.se3 = SE_Block(filters[2])
self.active = torch.nn.Sigmoid()
def forward(self, x):
e1 = self.Conv1(x)
e1 = self.se1(e1)
e2 = self.Maxpool1(e1)
e2 = self.Conv2(e2)
e2 = self.se2(e2)
e3 = self.Maxpool2(e2)
e3 = self.Conv3(e3)
e3 = self.se3(e3)
e4 = self.Maxpool3(e3)
e4 = self.Conv4(e4)
e5 = self.Maxpool4(e4)
e5 = self.Conv5(e5)
d5 = self.Up5(e5)
d5 = torch.cat((e4, d5), dim=1)
d5 = self.Up_conv5(d5)
d4 = self.Up4(d5)
d4 = torch.cat((e3, d4), dim=1)
d4 = self.Up_conv4(d4)
d3 = self.Up3(d4)
d3 = torch.cat((e2, d3), dim=1)
d3 = self.Up_conv3(d3)
d2 = self.Up2(d3)
d2 = torch.cat((e1, d2), dim=1)
d2 = self.Up_conv2(d2)
out = self.Conv(d2)
out = self.active(out)
return out
\ No newline at end of file
import argparse
from configs import Config
from trainer import Trainer
from unet_trainer import UNetTrainer
def main(args, cfg):
if args.config == "segm":
trainer = UNetTrainer(args, cfg)
else:
trainer = Trainer(args, cfg)
trainer.fit()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Training custom model")
parser.add_argument("--resume", default=None, type=str, help="resume training")
parser.add_argument("config", default="config", type=str, help="config training")
args = parser.parse_args()
config = Config(f"./configs/{args.config}.yaml")
main(args, config)
import os
import cv2
import time
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.utils.data as data
from torch.optim.lr_scheduler import StepLR
from torchvision.utils import save_image
from models import *
from losses import *
from datasets import Places365Dataset, FacemaskDataset
def adjust_learning_rate(optimizer, gamma, num_steps=1):
for i in range(num_steps):
for param_group in optimizer.param_groups:
param_group['lr'] *= gamma
def get_epoch_iters(path):
path = os.path.basename(path)
tokens = path[:-4].split('_')
try:
if tokens[-1] == 'interrupted':
epoch_idx = int(tokens[-3])
iter_idx = int(tokens[-2])
else:
epoch_idx = int(tokens[-2])
iter_idx = int(tokens[-1])
except:
return 0, 0
return epoch_idx, iter_idx
def load_checkpoint(model_G, model_D, path):
state = torch.load(path,map_location='cpu')
model_G.load_state_dict(state['G'])
model_D.load_state_dict(state['D'])
print('Loaded checkpoint successfully')
class Trainer():
def __init__(self, args, cfg):
if args.resume is not None:
epoch, iters = get_epoch_iters(args.resume)
else:
epoch = 0
iters = 0
if not os.path.exists(cfg.checkpoint_path):
os.makedirs(cfg.checkpoint_path)
if not os.path.exists(cfg.sample_folder):
os.makedirs(cfg.sample_folder)
self.cfg = cfg
self.step_iters = cfg.step_iters
self.gamma = cfg.gamma
self.visualize_per_iter = cfg.visualize_per_iter
self.print_per_iter = cfg.print_per_iter
self.save_per_iter = cfg.save_per_iter
self.start_iter = iters
self.iters = 0
self.num_epochs = cfg.num_epochs
self.device = torch.device('cuda' if cfg.cuda else 'cpu')
trainset = FacemaskDataset(cfg) # Places365Dataset(cfg) #
self.trainloader = data.DataLoader(
trainset,
batch_size=cfg.batch_size,
num_workers = cfg.num_workers,
pin_memory = True,
shuffle=True,
collate_fn = trainset.collate_fn)
self.epoch = int(self.start_iter / len(self.trainloader))
self.iters = self.start_iter
self.num_iters = (self.num_epochs+1) * len(self.trainloader)
self.model_G = GatedGenerator().to(self.device)
self.model_D = NLayerDiscriminator(cfg.d_num_layers, use_sigmoid=False).to(self.device)
self.model_P = PerceptualNet(name = "vgg16", resize=False).to(self.device)
if args.resume is not None:
load_checkpoint(self.model_G, self.model_D, args.resume)
self.criterion_adv = GANLoss(target_real_label=0.9, target_fake_label=0.1)
self.criterion_rec = nn.SmoothL1Loss()
self.criterion_ssim = SSIM(window_size = 11)
self.criterion_per = nn.SmoothL1Loss()
self.optimizer_D = torch.optim.Adam(self.model_D.parameters(), lr=cfg.lr)
self.optimizer_G = torch.optim.Adam(self.model_G.parameters(), lr=cfg.lr)
def validate(self, sample_folder, sample_name, img_list):
save_img_path = os.path.join(sample_folder, sample_name+'.png')
img_list = [i.clone().cpu() for i in img_list]
imgs = torch.stack(img_list, dim=1)
# imgs shape: Bx5xCxWxH
imgs = imgs.view(-1, *list(imgs.size())[2:])
save_image(imgs, save_img_path, nrow= 5)
print(f"Save image to {save_img_path}")
def fit(self):
self.model_G.train()
self.model_D.train()
running_loss = {
'D': 0,
'G': 0,
'P': 0,
'R_1': 0,
'R_2': 0,
'T': 0,
}
running_time = 0
step = 0
try:
for epoch in range(self.epoch, self.num_epochs):
self.epoch = epoch
for i, batch in enumerate(self.trainloader):
start_time = time.time()
imgs = batch['imgs'].to(self.device)
masks = batch['masks'].to(self.device)
# Train discriminator
self.optimizer_D.zero_grad()
self.optimizer_G.zero_grad()
first_out, second_out = self.model_G(imgs, masks)
first_out_wholeimg = imgs * (1 - masks) + first_out * masks
second_out_wholeimg = imgs * (1 - masks) + second_out * masks
masks = masks.cpu()
fake_D = self.model_D(second_out_wholeimg.detach())
real_D = self.model_D(imgs)
loss_fake_D = self.criterion_adv(fake_D, target_is_real=False)
loss_real_D = self.criterion_adv(real_D, target_is_real=True)
loss_D = (loss_fake_D + loss_real_D) * 0.5
loss_D.backward()
self.optimizer_D.step()
real_D = None
# Train Generator
self.optimizer_D.zero_grad()
self.optimizer_G.zero_grad()
fake_D = self.model_D(second_out_wholeimg)
loss_G = self.criterion_adv(fake_D, target_is_real=True)
fake_D = None
# Reconstruction loss
loss_l1_1 = self.criterion_rec(first_out_wholeimg, imgs)
loss_l1_2 = self.criterion_rec(second_out_wholeimg, imgs)
loss_ssim_1 = self.criterion_ssim(first_out_wholeimg, imgs)
loss_ssim_2 = self.criterion_ssim(second_out_wholeimg, imgs)
loss_rec_1 = 0.5 * loss_l1_1 + 0.5 * (1 - loss_ssim_1)
loss_rec_2 = 0.5 * loss_l1_2 + 0.5 * (1 - loss_ssim_2)
# Perceptual loss
loss_P = self.model_P(second_out_wholeimg, imgs)
loss = self.cfg.lambda_G * loss_G + self.cfg.lambda_rec_1 * loss_rec_1 + self.cfg.lambda_rec_2 * loss_rec_2 + self.cfg.lambda_per * loss_P
loss.backward()
self.optimizer_G.step()
end_time = time.time()
imgs = imgs.cpu()
# Visualize number
running_time += (end_time - start_time)
running_loss['D'] += loss_D.item()
running_loss['G'] += (self.cfg.lambda_G * loss_G.item())
running_loss['P'] += (self.cfg.lambda_per * loss_P.item())
running_loss['R_1'] += (self.cfg.lambda_rec_1 * loss_rec_1.item())
running_loss['R_2'] += (self.cfg.lambda_rec_2 * loss_rec_2.item())
running_loss['T'] += loss.item()
if self.iters % self.print_per_iter == 0:
for key in running_loss.keys():
running_loss[key] /= self.print_per_iter
running_loss[key] = np.round(running_loss[key], 5)
loss_string = '{}'.format(running_loss)[1:-1].replace("'",'').replace(",",' ||')
print("[{}|{}] [{}|{}] || {} || Time: {:10.4f}s".format(self.epoch, self.num_epochs, self.iters, self.num_iters, loss_string, running_time))
running_loss = {
'D': 0,
'G': 0,
'P': 0,
'R_1': 0,
'R_2': 0,
'T': 0,
}
running_time = 0
if self.iters % self.save_per_iter == 0:
torch.save({
'D': self.model_D.state_dict(),
'G': self.model_G.state_dict(),
}, os.path.join(self.cfg.checkpoint_path, f"model_{self.epoch}_{self.iters}.pth"))
# Step learning rate
if self.iters == self.step_iters[step]:
adjust_learning_rate(self.optimizer_D, self.gamma)
adjust_learning_rate(self.optimizer_G, self.gamma)
step+=1
# Visualize sample
if self.iters % self.visualize_per_iter == 0:
masked_imgs = imgs * (1 - masks) + masks
img_list = [imgs, masked_imgs, first_out, second_out, second_out_wholeimg]
#name_list = ['gt', 'mask', 'masked_img', 'first_out', 'second_out']
filename = f"{self.epoch}_{str(self.iters)}"
self.validate(self.cfg.sample_folder, filename , img_list)
self.iters += 1
except KeyboardInterrupt:
torch.save({
'D': self.model_D.state_dict(),
'G': self.model_G.state_dict(),
}, os.path.join(self.cfg.checkpoint_path, f"model_{self.epoch}_{self.iters}.pth"))
\ No newline at end of file
import os
import cv2
import time
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.utils.data as data
from torch.optim.lr_scheduler import StepLR
from torchvision.utils import save_image
from models import UNetSemantic
from losses import DiceLoss
from datasets import FacemaskSegDataset
from metrics import *
def adjust_learning_rate(optimizer, gamma, num_steps=1):
for i in range(num_steps):
for param_group in optimizer.param_groups:
param_group["lr"] *= gamma
def get_epoch_iters(path):
path = os.path.basename(path)
tokens = path[:-4].split("_")
try:
if tokens[-1] == "interrupted":
epoch_idx = int(tokens[-3])
iter_idx = int(tokens[-2])
else:
epoch_idx = int(tokens[-2])
iter_idx = int(tokens[-1])
except:
return 0, 0
return epoch_idx, iter_idx
def load_checkpoint(model, path):
state = torch.load(path, map_location="cpu")
model.load_state_dict(state)
print("Loaded checkpoint successfully")
class UNetTrainer:
def __init__(self, args, cfg):
if args.resume is not None:
epoch, iters = get_epoch_iters(args.resume)
else:
epoch = 0
iters = 0
self.cfg = cfg
self.step_iters = cfg.step_iters
self.gamma = cfg.gamma
self.visualize_per_iter = cfg.visualize_per_iter
self.print_per_iter = cfg.print_per_iter
self.save_per_iter = cfg.save_per_iter
self.start_iter = iters
self.iters = 0
self.num_epochs = cfg.num_epochs
self.device = torch.device("cuda:0" if cfg.cuda else "cpu")
trainset = FacemaskSegDataset(cfg)
valset = FacemaskSegDataset(cfg, train=False)
self.trainloader = data.DataLoader(
trainset,
batch_size=cfg.batch_size,
num_workers=cfg.num_workers,
pin_memory=True,
shuffle=True,
collate_fn=trainset.collate_fn,
)
self.valloader = data.DataLoader(
valset,
batch_size=cfg.batch_size,
num_workers=cfg.num_workers,
pin_memory=True,
shuffle=True,
collate_fn=valset.collate_fn,
)
self.epoch = int(self.start_iter / len(self.trainloader))
self.iters = self.start_iter
self.num_iters = (self.num_epochs + 1) * len(self.trainloader)
self.model = UNetSemantic().to(self.device)
self.criterion_dice = DiceLoss()
self.criterion_bce = nn.BCELoss()
if args.resume is not None:
load_checkpoint(self.model, args.resume)
self.optimizer = torch.optim.Adam(self.model.parameters(), lr=cfg.lr)
def validate(self, sample_folder, sample_name, img_list):
save_img_path = os.path.join(sample_folder, sample_name + ".png")
img_list = [i.clone().cpu() for i in img_list]
imgs = torch.stack(img_list, dim=1)
# imgs shape: Bx5xCxWxH
imgs = imgs.view(-1, *list(imgs.size())[2:])
save_image(imgs, save_img_path, nrow=3)
print(f"Save image to {save_img_path}")
def train_epoch(self):
self.model.train()
running_loss = {
"DICE": 0,
"BCE": 0,
"T": 0,
}
running_time = 0
for idx, batch in enumerate(self.trainloader):
self.optimizer.zero_grad()
inputs = batch["imgs"].to(self.device)
targets = batch["masks"].to(self.device)
start_time = time.time()
outputs = self.model(inputs)
loss_bce = self.criterion_bce(outputs, targets)
loss_dice = self.criterion_dice(outputs, targets)
loss = loss_bce + loss_dice
loss.backward()
self.optimizer.step()
end_time = time.time()
running_loss["T"] += loss.item()
running_loss["DICE"] += loss_dice.item()
running_loss["BCE"] += loss_bce.item()
running_time += end_time - start_time
if self.iters % self.print_per_iter == 0:
for key in running_loss.keys():
running_loss[key] /= self.print_per_iter
running_loss[key] = np.round(running_loss[key], 5)
loss_string = (
"{}".format(running_loss)[1:-1].replace("'", "").replace(",", " ||")
)
running_time = np.round(running_time, 5)
print(
"[{}/{}][{}/{}] || {} || Time: {}s".format(
self.epoch,
self.num_epochs,
self.iters,
self.num_iters,
loss_string,
running_time,
)
)
running_time = 0
running_loss = {
"DICE": 0,
"BCE": 0,
"T": 0,
}
if self.iters % self.save_per_iter == 0:
save_path = os.path.join(
self.cfg.checkpoint_path,
f"model_segm_{self.epoch}_{self.iters}.pth",
)
torch.save(self.model.state_dict(), save_path)
print(f"Save model at {save_path}")
self.iters += 1
def validate_epoch(self):
# Validate
self.model.eval()
metrics = [DiceScore(1), PixelAccuracy(1)]
running_loss = {
"DICE": 0,
"BCE": 0,
"T": 0,
}
running_time = 0
print(
"=============================EVALUATION==================================="
)
with torch.no_grad():
start_time = time.time()
for idx, batch in enumerate(tqdm(self.valloader)):
inputs = batch["imgs"].to(self.device)
targets = batch["masks"].to(self.device)
outputs = self.model(inputs)
loss_bce = self.criterion_bce(outputs, targets)
loss_dice = self.criterion_dice(outputs, targets)
loss = loss_bce + loss_dice
running_loss["T"] += loss.item()
running_loss["DICE"] += loss_dice.item()
running_loss["BCE"] += loss_bce.item()
for metric in metrics:
metric.update(outputs.cpu(), targets.cpu())
end_time = time.time()
running_time += end_time - start_time
running_time = np.round(running_time, 5)
for key in running_loss.keys():
running_loss[key] /= len(self.valloader)
running_loss[key] = np.round(running_loss[key], 5)
loss_string = (
"{}".format(running_loss)[1:-1].replace("'", "").replace(",", " ||")
)
print(
"[{}/{}] || Validation || {} || Time: {}s".format(
self.epoch, self.num_epochs, loss_string, running_time
)
)
for metric in metrics:
print(metric)
print(
"=========================================================================="
)
def fit(self):
try:
for epoch in range(self.epoch, self.num_epochs + 1):
self.epoch = epoch
self.train_epoch()
self.validate_epoch()
except KeyboardInterrupt:
torch.save(
self.model.state_dict(),
os.path.join(
self.cfg.checkpoint_path,
f"model_segm_{self.epoch}_{self.iters}.pth",
),
)
print("Model saved!")
# Repo-specific
data/masks/*
.vscode*
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
backup*
pexels_royalty_free_photos*
\ No newline at end of file
from keras.utils import conv_utils
from keras import backend as K
from keras.engine import InputSpec
from keras.layers import Conv2D
class PConv2D(Conv2D):
def __init__(self, *args, n_channels=3, mono=False, **kwargs):
super().__init__(*args, **kwargs)
self.input_spec = [InputSpec(ndim=4), InputSpec(ndim=4)]
def build(self, input_shape):
"""Adapted from original _Conv() layer of Keras
param input_shape: list of dimensions for [img, mask]
"""
if self.data_format == 'channels_first':
channel_axis = 1
else:
channel_axis = -1
if input_shape[0][channel_axis] is None:
raise ValueError('The channel dimension of the inputs should be defined. Found `None`.')
self.input_dim = input_shape[0][channel_axis]
# Image kernel
kernel_shape = self.kernel_size + (self.input_dim, self.filters)
self.kernel = self.add_weight(shape=kernel_shape,
initializer=self.kernel_initializer,
name='img_kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
# Mask kernel
self.kernel_mask = K.ones(shape=self.kernel_size + (self.input_dim, self.filters))
# Calculate padding size to achieve zero-padding
self.pconv_padding = (
(int((self.kernel_size[0]-1)/2), int((self.kernel_size[0]-1)/2)),
(int((self.kernel_size[0]-1)/2), int((self.kernel_size[0]-1)/2)),
)
# Window size - used for normalization
self.window_size = self.kernel_size[0] * self.kernel_size[1]
if self.use_bias:
self.bias = self.add_weight(shape=(self.filters,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.built = True
def call(self, inputs, mask=None):
'''
We will be using the Keras conv2d method, and essentially we have
to do here is multiply the mask with the input X, before we apply the
convolutions. For the mask itself, we apply convolutions with all weights
set to 1.
Subsequently, we clip mask values to between 0 and 1
'''
# Both image and mask must be supplied
if type(inputs) is not list or len(inputs) != 2:
raise Exception('PartialConvolution2D must be called on a list of two tensors [img, mask]. Instead got: ' + str(inputs))
# Padding done explicitly so that padding becomes part of the masked partial convolution
images = K.spatial_2d_padding(inputs[0], self.pconv_padding, self.data_format)
masks = K.spatial_2d_padding(inputs[1], self.pconv_padding, self.data_format)
# Apply convolutions to mask
mask_output = K.conv2d(
masks, self.kernel_mask,
strides=self.strides,
padding='valid',
data_format=self.data_format,
dilation_rate=self.dilation_rate
)
# Apply convolutions to image
img_output = K.conv2d(
(images*masks), self.kernel,
strides=self.strides,
padding='valid',
data_format=self.data_format,
dilation_rate=self.dilation_rate
)
# Calculate the mask ratio on each pixel in the output mask
mask_ratio = self.window_size / (mask_output + 1e-8)
# Clip output to be between 0 and 1
mask_output = K.clip(mask_output, 0, 1)
# Remove ratio values where there are holes
mask_ratio = mask_ratio * mask_output
# Normalize iamge output
img_output = img_output * mask_ratio
# Apply bias only to the image (if chosen to do so)
if self.use_bias:
img_output = K.bias_add(
img_output,
self.bias,
data_format=self.data_format)
# Apply activations on the image
if self.activation is not None:
img_output = self.activation(img_output)
return [img_output, mask_output]
def compute_output_shape(self, input_shape):
if self.data_format == 'channels_last':
space = input_shape[0][1:-1]
new_space = []
for i in range(len(space)):
new_dim = conv_utils.conv_output_length(
space[i],
self.kernel_size[i],
padding='same',
stride=self.strides[i],
dilation=self.dilation_rate[i])
new_space.append(new_dim)
new_shape = (input_shape[0][0],) + tuple(new_space) + (self.filters,)
return [new_shape, new_shape]
if self.data_format == 'channels_first':
space = input_shape[2:]
new_space = []
for i in range(len(space)):
new_dim = conv_utils.conv_output_length(
space[i],
self.kernel_size[i],
padding='same',
stride=self.strides[i],
dilation=self.dilation_rate[i])
new_space.append(new_dim)
new_shape = (input_shape[0], self.filters) + tuple(new_space)
return [new_shape, new_shape]
import os
import sys
import numpy as np
from datetime import datetime
import tensorflow as tf
from keras.models import Model
from keras.models import load_model
from keras.optimizers import Adam
from keras.layers import Input, Conv2D, UpSampling2D, Dropout, LeakyReLU, BatchNormalization, Activation, Lambda
from keras.layers.merge import Concatenate
from keras.applications import VGG16
from keras import backend as K
from keras.utils.multi_gpu_utils import multi_gpu_model
from libs.pconv_layer import PConv2D
class PConvUnet(object):
def __init__(self, img_rows=512, img_cols=512, vgg_weights="imagenet", inference_only=False, net_name='default', gpus=1, vgg_device=None):
"""Create the PConvUnet. If variable image size, set img_rows and img_cols to None
Args:
img_rows (int): image height.
img_cols (int): image width.
vgg_weights (str): which weights to pass to the vgg network.
inference_only (bool): initialize BN layers for inference.
net_name (str): Name of this network (used in logging).
gpus (int): How many GPUs to use for training.
vgg_device (str): In case of training with multiple GPUs, specify which device to run VGG inference on.
e.g. if training on 8 GPUs, vgg inference could be off-loaded exclusively to one GPU, instead of
running on one of the GPUs which is also training the UNet.
"""
# Settings
self.img_rows = img_rows
self.img_cols = img_cols
self.img_overlap = 30
self.inference_only = inference_only
self.net_name = net_name
self.gpus = gpus
self.vgg_device = vgg_device
# Scaling for VGG input
self.mean = [0.485, 0.456, 0.406]
self.std = [0.229, 0.224, 0.225]
# Assertions
assert self.img_rows >= 256, 'Height must be >256 pixels'
assert self.img_cols >= 256, 'Width must be >256 pixels'
# Set current epoch
self.current_epoch = 0
# VGG layers to extract features from (first maxpooling layers, see pp. 7 of paper)
self.vgg_layers = [3, 6, 10]
# Instantiate the vgg network
if self.vgg_device:
with tf.device(self.vgg_device):
self.vgg = self.build_vgg(vgg_weights)
else:
self.vgg = self.build_vgg(vgg_weights)
# Create UNet-like model
if self.gpus <= 1:
self.model, inputs_mask = self.build_pconv_unet()
self.compile_pconv_unet(self.model, inputs_mask)
else:
with tf.device("/cpu:0"):
self.model, inputs_mask = self.build_pconv_unet()
self.model = multi_gpu_model(self.model, gpus=self.gpus)
self.compile_pconv_unet(self.model, inputs_mask)
def build_vgg(self, weights="imagenet"):
"""
Load pre-trained VGG16 from keras applications
Extract features to be used in loss function from last conv layer, see architecture at:
https://github.com/keras-team/keras/blob/master/keras/applications/vgg16.py
"""
# Input image to extract features from
img = Input(shape=(self.img_rows, self.img_cols, 3))
# Mean center and rescale by variance as in PyTorch
processed = Lambda(lambda x: (x-self.mean) / self.std)(img)
# If inference only, just return empty model
if self.inference_only:
model = Model(inputs=img, outputs=[img for _ in range(len(self.vgg_layers))])
model.trainable = False
model.compile(loss='mse', optimizer='adam')
return model
# Get the vgg network from Keras applications
if weights in ['imagenet', None]:
vgg = VGG16(weights=weights, include_top=False)
else:
vgg = VGG16(weights=None, include_top=False)
vgg.load_weights(weights, by_name=True)
# Output the first three pooling layers
vgg.outputs = [vgg.layers[i].output for i in self.vgg_layers]
# Create model and compile
model = Model(inputs=img, outputs=vgg(processed))
model.trainable = False
model.compile(loss='mse', optimizer='adam')
return model
def build_pconv_unet(self, train_bn=True):
# INPUTS
inputs_img = Input((self.img_rows, self.img_cols, 3), name='inputs_img')
inputs_mask = Input((self.img_rows, self.img_cols, 3), name='inputs_mask')
# ENCODER
def encoder_layer(img_in, mask_in, filters, kernel_size, bn=True):
conv, mask = PConv2D(filters, kernel_size, strides=2, padding='same')([img_in, mask_in])
if bn:
conv = BatchNormalization(name='EncBN'+str(encoder_layer.counter))(conv, training=train_bn)
conv = Activation('relu')(conv)
encoder_layer.counter += 1
return conv, mask
encoder_layer.counter = 0
e_conv1, e_mask1 = encoder_layer(inputs_img, inputs_mask, 64, 7, bn=False)
e_conv2, e_mask2 = encoder_layer(e_conv1, e_mask1, 128, 5)
e_conv3, e_mask3 = encoder_layer(e_conv2, e_mask2, 256, 5)
e_conv4, e_mask4 = encoder_layer(e_conv3, e_mask3, 512, 3)
e_conv5, e_mask5 = encoder_layer(e_conv4, e_mask4, 512, 3)
e_conv6, e_mask6 = encoder_layer(e_conv5, e_mask5, 512, 3)
e_conv7, e_mask7 = encoder_layer(e_conv6, e_mask6, 512, 3)
e_conv8, e_mask8 = encoder_layer(e_conv7, e_mask7, 512, 3)
# DECODER
def decoder_layer(img_in, mask_in, e_conv, e_mask, filters, kernel_size, bn=True):
up_img = UpSampling2D(size=(2,2))(img_in)
up_mask = UpSampling2D(size=(2,2))(mask_in)
concat_img = Concatenate(axis=3)([e_conv,up_img])
concat_mask = Concatenate(axis=3)([e_mask,up_mask])
conv, mask = PConv2D(filters, kernel_size, padding='same')([concat_img, concat_mask])
if bn:
conv = BatchNormalization()(conv)
conv = LeakyReLU(alpha=0.2)(conv)
return conv, mask
d_conv9, d_mask9 = decoder_layer(e_conv8, e_mask8, e_conv7, e_mask7, 512, 3)
d_conv10, d_mask10 = decoder_layer(d_conv9, d_mask9, e_conv6, e_mask6, 512, 3)
d_conv11, d_mask11 = decoder_layer(d_conv10, d_mask10, e_conv5, e_mask5, 512, 3)
d_conv12, d_mask12 = decoder_layer(d_conv11, d_mask11, e_conv4, e_mask4, 512, 3)
d_conv13, d_mask13 = decoder_layer(d_conv12, d_mask12, e_conv3, e_mask3, 256, 3)
d_conv14, d_mask14 = decoder_layer(d_conv13, d_mask13, e_conv2, e_mask2, 128, 3)
d_conv15, d_mask15 = decoder_layer(d_conv14, d_mask14, e_conv1, e_mask1, 64, 3)
d_conv16, d_mask16 = decoder_layer(d_conv15, d_mask15, inputs_img, inputs_mask, 3, 3, bn=False)
outputs = Conv2D(3, 1, activation = 'sigmoid', name='outputs_img')(d_conv16)
# Setup the model inputs / outputs
model = Model(inputs=[inputs_img, inputs_mask], outputs=outputs)
return model, inputs_mask
def compile_pconv_unet(self, model, inputs_mask, lr=0.0002):
model.compile(
optimizer = Adam(lr=lr),
loss=self.loss_total(inputs_mask),
metrics=[self.PSNR]
)
def loss_total(self, mask):
"""
Creates a loss function which sums all the loss components
and multiplies by their weights. See paper eq. 7.
"""
def loss(y_true, y_pred):
# Compute predicted image with non-hole pixels set to ground truth
y_comp = mask * y_true + (1-mask) * y_pred
# Compute the vgg features.
if self.vgg_device:
with tf.device(self.vgg_device):
vgg_out = self.vgg(y_pred)
vgg_gt = self.vgg(y_true)
vgg_comp = self.vgg(y_comp)
else:
vgg_out = self.vgg(y_pred)
vgg_gt = self.vgg(y_true)
vgg_comp = self.vgg(y_comp)
# Compute loss components
l1 = self.loss_valid(mask, y_true, y_pred)
l2 = self.loss_hole(mask, y_true, y_pred)
l3 = self.loss_perceptual(vgg_out, vgg_gt, vgg_comp)
l4 = self.loss_style(vgg_out, vgg_gt)
l5 = self.loss_style(vgg_comp, vgg_gt)
l6 = self.loss_tv(mask, y_comp)
# Return loss function
return l1 + 6*l2 + 0.05*l3 + 120*(l4+l5) + 0.1*l6
return loss
def loss_hole(self, mask, y_true, y_pred):
"""Pixel L1 loss within the hole / mask"""
return self.l1((1-mask) * y_true, (1-mask) * y_pred)
def loss_valid(self, mask, y_true, y_pred):
"""Pixel L1 loss outside the hole / mask"""
return self.l1(mask * y_true, mask * y_pred)
def loss_perceptual(self, vgg_out, vgg_gt, vgg_comp):
"""Perceptual loss based on VGG16, see. eq. 3 in paper"""
loss = 0
for o, c, g in zip(vgg_out, vgg_comp, vgg_gt):
loss += self.l1(o, g) + self.l1(c, g)
return loss
def loss_style(self, output, vgg_gt):
"""Style loss based on output/computation, used for both eq. 4 & 5 in paper"""
loss = 0
for o, g in zip(output, vgg_gt):
loss += self.l1(self.gram_matrix(o), self.gram_matrix(g))
return loss
def loss_tv(self, mask, y_comp):
"""Total variation loss, used for smoothing the hole region, see. eq. 6"""
# Create dilated hole region using a 3x3 kernel of all 1s.
kernel = K.ones(shape=(3, 3, mask.shape[3], mask.shape[3]))
dilated_mask = K.conv2d(1-mask, kernel, data_format='channels_last', padding='same')
# Cast values to be [0., 1.], and compute dilated hole region of y_comp
dilated_mask = K.cast(K.greater(dilated_mask, 0), 'float32')
P = dilated_mask * y_comp
# Calculate total variation loss
a = self.l1(P[:,1:,:,:], P[:,:-1,:,:])
b = self.l1(P[:,:,1:,:], P[:,:,:-1,:])
return a+b
def fit_generator(self, generator, *args, **kwargs):
"""Fit the U-Net to a (images, targets) generator
Args:
generator (generator): generator supplying input image & mask, as well as targets.
*args: arguments to be passed to fit_generator
**kwargs: keyword arguments to be passed to fit_generator
"""
self.model.fit_generator(
generator,
*args, **kwargs
)
def summary(self):
"""Get summary of the UNet model"""
print(self.model.summary())
def load(self, filepath, train_bn=True, lr=0.0002):
# Create UNet-like model
self.model, inputs_mask = self.build_pconv_unet(train_bn)
self.compile_pconv_unet(self.model, inputs_mask, lr)
# Load weights into model
epoch = int(os.path.basename(filepath).split('.')[1].split('-')[0])
assert epoch > 0, "Could not parse weight file. Should include the epoch"
self.current_epoch = epoch
self.model.load_weights(filepath)
@staticmethod
def PSNR(y_true, y_pred):
"""
PSNR is Peek Signal to Noise Ratio, see https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
The equation is:
PSNR = 20 * log10(MAX_I) - 10 * log10(MSE)
Our input is scaled with be within the range -2.11 to 2.64 (imagenet value scaling). We use the difference between these
two values (4.75) as MAX_I
"""
#return 20 * K.log(4.75) / K.log(10.0) - 10.0 * K.log(K.mean(K.square(y_pred - y_true))) / K.log(10.0)
return - 10.0 * K.log(K.mean(K.square(y_pred - y_true))) / K.log(10.0)
@staticmethod
def current_timestamp():
return datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
@staticmethod
def l1(y_true, y_pred):
"""Calculate the L1 loss used in all loss calculations"""
if K.ndim(y_true) == 4:
return K.mean(K.abs(y_pred - y_true), axis=[1,2,3])
elif K.ndim(y_true) == 3:
return K.mean(K.abs(y_pred - y_true), axis=[1,2])
else:
raise NotImplementedError("Calculating L1 loss on 1D tensors? should not occur for this network")
@staticmethod
def gram_matrix(x, norm_by_channels=False):
"""Calculate gram matrix used in style loss"""
# Assertions on input
assert K.ndim(x) == 4, 'Input tensor should be a 4d (B, H, W, C) tensor'
assert K.image_data_format() == 'channels_last', "Please use channels-last format"
# Permute channels and get resulting shape
x = K.permute_dimensions(x, (0, 3, 1, 2))
shape = K.shape(x)
B, C, H, W = shape[0], shape[1], shape[2], shape[3]
# Reshape x and do batch dot product
features = K.reshape(x, K.stack([B, C, H*W]))
gram = K.batch_dot(features, features, axes=2)
# Normalize with channels, height and width
gram = gram / K.cast(C * H * W, x.dtype)
return gram
# Prediction functions
######################
def predict(self, sample, **kwargs):
"""Run prediction using this model"""
return self.model.predict(sample, **kwargs)
import os
from random import randint, seed
import itertools
import numpy as np
import cv2
class MaskGenerator():
def __init__(self, height, width, channels=3, rand_seed=None, filepath=None):
"""Convenience functions for generating masks to be used for inpainting training
Arguments:
height {int} -- Mask height
width {width} -- Mask width
Keyword Arguments:
channels {int} -- Channels to output (default: {3})
rand_seed {[type]} -- Random seed (default: {None})
filepath {[type]} -- Load masks from filepath. If None, generate masks with OpenCV (default: {None})
"""
self.height = height
self.width = width
self.channels = channels
self.filepath = filepath
# If filepath supplied, load the list of masks within the directory
self.mask_files = []
if self.filepath:
filenames = [f for f in os.listdir(self.filepath)]
self.mask_files = [f for f in filenames if any(filetype in f.lower() for filetype in ['.jpeg', '.png', '.jpg'])]
print(">> Found {} masks in {}".format(len(self.mask_files), self.filepath))
# Seed for reproducibility
if rand_seed:
seed(rand_seed)
def _generate_mask(self):
"""Generates a random irregular mask with lines, circles and elipses"""
img = np.zeros((self.height, self.width, self.channels), np.uint8)
# Set size scale
size = int((self.width + self.height) * 0.03)
if self.width < 64 or self.height < 64:
raise Exception("Width and Height of mask must be at least 64!")
# Draw random lines
for _ in range(randint(1, 20)):
x1, x2 = randint(1, self.width), randint(1, self.width)
y1, y2 = randint(1, self.height), randint(1, self.height)
thickness = randint(3, size)
cv2.line(img,(x1,y1),(x2,y2),(1,1,1),thickness)
# Draw random circles
for _ in range(randint(1, 20)):
x1, y1 = randint(1, self.width), randint(1, self.height)
radius = randint(3, size)
cv2.circle(img,(x1,y1),radius,(1,1,1), -1)
# Draw random ellipses
for _ in range(randint(1, 20)):
x1, y1 = randint(1, self.width), randint(1, self.height)
s1, s2 = randint(1, self.width), randint(1, self.height)
a1, a2, a3 = randint(3, 180), randint(3, 180), randint(3, 180)
thickness = randint(3, size)
cv2.ellipse(img, (x1,y1), (s1,s2), a1, a2, a3,(1,1,1), thickness)
return 1-img
def _load_mask(self, rotation=True, dilation=True, cropping=True):
"""Loads a mask from disk, and optionally augments it"""
# Read image
mask = cv2.imread(os.path.join(self.filepath, np.random.choice(self.mask_files, 1, replace=False)[0]))
# Random rotation
if rotation:
rand = np.random.randint(-180, 180)
M = cv2.getRotationMatrix2D((mask.shape[1]/2, mask.shape[0]/2), rand, 1.5)
mask = cv2.warpAffine(mask, M, (mask.shape[1], mask.shape[0]))
# Random dilation
if dilation:
rand = np.random.randint(5, 47)
kernel = np.ones((rand, rand), np.uint8)
mask = cv2.erode(mask, kernel, iterations=1)
# Random cropping
if cropping:
x = np.random.randint(0, mask.shape[1] - self.width)
y = np.random.randint(0, mask.shape[0] - self.height)
mask = mask[y:y+self.height, x:x+self.width]
return (mask > 1).astype(np.uint8)
def sample(self, random_seed=None):
"""Retrieve a random mask"""
if random_seed:
seed(random_seed)
if self.filepath and len(self.mask_files) > 0:
return self._load_mask()
else:
return self._generate_mask()
class ImageChunker(object):
def __init__(self, rows, cols, overlap):
self.rows = rows
self.cols = cols
self.overlap = overlap
def perform_chunking(self, img_size, chunk_size):
"""
Given an image dimension img_size, return list of (start, stop)
tuples to perform chunking of chunk_size
"""
chunks, i = [], 0
while True:
chunks.append((i*(chunk_size - self.overlap/2), i*(chunk_size - self.overlap/2)+chunk_size))
i+=1
if chunks[-1][1] > img_size:
break
n_count = len(chunks)
chunks[-1] = tuple(x - (n_count*chunk_size - img_size - (n_count-1)*self.overlap/2) for x in chunks[-1])
chunks = [(int(x), int(y)) for x, y in chunks]
return chunks
def get_chunks(self, img, scale=1):
"""
Get width and height lists of (start, stop) tuples for chunking of img.
"""
x_chunks, y_chunks = [(0, self.rows)], [(0, self.cols)]
if img.shape[0] > self.rows:
x_chunks = self.perform_chunking(img.shape[0], self.rows)
else:
x_chunks = [(0, img.shape[0])]
if img.shape[1] > self.cols:
y_chunks = self.perform_chunking(img.shape[1], self.cols)
else:
y_chunks = [(0, img.shape[1])]
return x_chunks, y_chunks
def dimension_preprocess(self, img, padding=True):
"""
In case of prediction on image of different size than 512x512,
this function is used to add padding and chunk up the image into pieces
of 512x512, which can then later be reconstructed into the original image
using the dimension_postprocess() function.
"""
# Assert single image input
assert len(img.shape) == 3, "Image dimension expected to be (H, W, C)"
# Check if we are adding padding for too small images
if padding:
# Check if height is too small
if img.shape[0] < self.rows:
padding = np.ones((self.rows - img.shape[0], img.shape[1], img.shape[2]))
img = np.concatenate((img, padding), axis=0)
# Check if width is too small
if img.shape[1] < self.cols:
padding = np.ones((img.shape[0], self.cols - img.shape[1], img.shape[2]))
img = np.concatenate((img, padding), axis=1)
# Get chunking of the image
x_chunks, y_chunks = self.get_chunks(img)
# Chunk up the image
images = []
for x in x_chunks:
for y in y_chunks:
images.append(
img[x[0]:x[1], y[0]:y[1], :]
)
images = np.array(images)
return images
def dimension_postprocess(self, chunked_images, original_image, scale=1, padding=True):
"""
In case of prediction on image of different size than 512x512,
the dimension_preprocess function is used to add padding and chunk
up the image into pieces of 512x512, and this function is used to
reconstruct these pieces into the original image.
"""
# Assert input dimensions
assert len(original_image.shape) == 3, "Image dimension expected to be (H, W, C)"
assert len(chunked_images.shape) == 4, "Chunked images dimension expected to be (B, H, W, C)"
# Check if we are adding padding for too small images
if padding:
# Check if height is too small
if original_image.shape[0] < self.rows:
new_images = []
for img in chunked_images:
new_images.append(img[0:scale*original_image.shape[0], :, :])
chunked_images = np.array(new_images)
# Check if width is too small
if original_image.shape[1] < self.cols:
new_images = []
for img in chunked_images:
new_images.append(img[:, 0:scale*original_image.shape[1], :])
chunked_images = np.array(new_images)
# Put reconstruction into this array
new_shape = (
original_image.shape[0]*scale,
original_image.shape[1]*scale,
original_image.shape[2]
)
reconstruction = np.zeros(new_shape)
# Get the chunks for this image
x_chunks, y_chunks = self.get_chunks(original_image)
i = 0
s = scale
for x in x_chunks:
for y in y_chunks:
prior_fill = reconstruction != 0
chunk = np.zeros(new_shape)
chunk[x[0]*s:x[1]*s, y[0]*s:y[1]*s, :] += chunked_images[i]
chunk_fill = chunk != 0
reconstruction += chunk
reconstruction[prior_fill & chunk_fill] = reconstruction[prior_fill & chunk_fill] / 2
i += 1
return reconstruction
\ No newline at end of file
import os
import gc
import datetime
import numpy as np
import pandas as pd
import cv2
from argparse import ArgumentParser
from copy import deepcopy
from tqdm import tqdm
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard, ModelCheckpoint, LambdaCallback
from keras import backend as K
from keras.utils import Sequence
from keras_tqdm import TQDMCallback
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
from libs.pconv_model import PConvUnet
from libs.util import MaskGenerator
# Sample call
r"""
# Train on CelebaHQ
python main.py --name CelebHQ --train C:\Documents\Kaggle\celebaHQ-512\train\ --validation C:\Documents\Kaggle\celebaHQ-512\val\ --test C:\Documents\Kaggle\celebaHQ-512\test\ --checkpoint "C:\Users\Mathias Felix Gruber\Documents\GitHub\PConv-Keras\data\logs\imagenet_phase1_paperMasks\weights.35-0.70.h5"
"""
def parse_args():
parser = ArgumentParser(description="Training script for PConv inpainting")
parser.add_argument(
"-stage",
"--stage",
type=str,
default="train",
help="Which stage of training to run",
choices=["train", "finetune"],
)
parser.add_argument(
"-train", "--train", type=str, help="Folder with training images"
)
parser.add_argument(
"-validation", "--validation", type=str, help="Folder with validation images"
)
parser.add_argument("-test", "--test", type=str, help="Folder with testing images")
parser.add_argument(
"-name",
"--name",
type=str,
default="myDataset",
help="Dataset name, e.g. 'imagenet'",
)
parser.add_argument(
"-batch_size",
"--batch_size",
type=int,
default=4,
help="What batch-size should we use",
)
parser.add_argument(
"-test_path",
"--test_path",
type=str,
default="./data/test_samples/",
help="Where to output test images during training",
)
parser.add_argument(
"-weight_path",
"--weight_path",
type=str,
default="./data/logs/",
help="Where to output weights during training",
)
parser.add_argument(
"-log_path",
"--log_path",
type=str,
default="./data/logs/",
help="Where to output tensorboard logs during training",
)
parser.add_argument(
"-vgg_path",
"--vgg_path",
type=str,
default="./data/logs/pytorch_to_keras_vgg16.h5",
help="VGG16 weights trained on PyTorch with pixel scaling 1/255.",
)
parser.add_argument(
"-checkpoint",
"--checkpoint",
type=str,
help="Previous weights to be loaded onto model",
)
return parser.parse_args()
class AugmentingDataGenerator(ImageDataGenerator):
"""Wrapper for ImageDataGenerator to return mask & image"""
def flow_from_directory(self, directory, mask_generator, *args, **kwargs):
generator = super().flow_from_directory(
directory, class_mode=None, *args, **kwargs
)
seed = None if "seed" not in kwargs else kwargs["seed"]
while True:
# Get augmentend image samples
ori = next(generator)
# Get masks for each image sample
mask = np.stack(
[mask_generator.sample(seed) for _ in range(ori.shape[0])], axis=0
)
# Apply masks to all image sample
masked = deepcopy(ori)
masked[mask == 0] = 1
# Yield ([ori, masl], ori) training batches
# print(masked.shape, ori.shape)
gc.collect()
yield [masked, mask], ori
# Run script
if __name__ == "__main__":
# Parse command-line arguments
args = parse_args()
if args.stage == "finetune" and not args.checkpoint:
raise AttributeError(
"If you are finetuning your model, you must supply a checkpoint file"
)
# Create training generator
train_datagen = AugmentingDataGenerator(
rotation_range=10,
width_shift_range=0.1,
height_shift_range=0.1,
rescale=1.0 / 255,
horizontal_flip=True,
)
train_generator = train_datagen.flow_from_directory(
args.train,
MaskGenerator(512, 512, 3),
target_size=(512, 512),
batch_size=args.batch_size,
)
# Create validation generator
val_datagen = AugmentingDataGenerator(rescale=1.0 / 255)
val_generator = val_datagen.flow_from_directory(
args.validation,
MaskGenerator(512, 512, 3),
target_size=(512, 512),
batch_size=args.batch_size,
classes=["val"],
seed=42,
)
# Create testing generator
test_datagen = AugmentingDataGenerator(rescale=1.0 / 255)
test_generator = test_datagen.flow_from_directory(
args.test,
MaskGenerator(512, 512, 3),
target_size=(512, 512),
batch_size=args.batch_size,
seed=42,
)
# Pick out an example to be send to test samples folder
test_data = next(test_generator)
(masked, mask), ori = test_data
def plot_callback(model, path):
"""Called at the end of each epoch, displaying our previous test images,
as well as their masked predictions and saving them to disk"""
# Get samples & Display them
pred_img = model.predict([masked, mask])
pred_time = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
# Clear current output and display test images
for i in range(len(ori)):
_, axes = plt.subplots(1, 3, figsize=(20, 5))
axes[0].imshow(masked[i, :, :, :])
axes[1].imshow(pred_img[i, :, :, :] * 1.0)
axes[2].imshow(ori[i, :, :, :])
axes[0].set_title("Masked Image")
axes[1].set_title("Predicted Image")
axes[2].set_title("Original Image")
plt.savefig(os.path.join(path, "/img_{}_{}.png".format(i, pred_time)))
plt.close()
# Load the model
if args.vgg_path:
model = PConvUnet(vgg_weights=args.vgg_path)
else:
model = PConvUnet()
# Loading of checkpoint
if args.checkpoint:
if args.stage == "train":
model.load(args.checkpoint)
elif args.stage == "finetune":
model.load(args.checkpoint, train_bn=False, lr=0.00005)
# Fit model
model.fit_generator(
train_generator,
steps_per_epoch=10000,
validation_data=val_generator,
validation_steps=1000,
epochs=100,
verbose=0,
callbacks=[
TensorBoard(
log_dir=os.path.join(args.log_path, args.name + "_phase1"),
write_graph=False,
),
ModelCheckpoint(
os.path.join(
args.log_path,
args.name + "_phase1",
"weights.{epoch:02d}-{loss:.2f}.h5",
),
monitor="val_loss",
save_best_only=True,
save_weights_only=True,
),
LambdaCallback(
on_epoch_end=lambda epoch, logs: plot_callback(model, args.test_path)
),
TQDMCallback(),
],
)
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
h5py==2.8.0
Keras==2.2.4
Keras-Applications==1.0.6
Keras-Preprocessing==1.0.5
keras-tqdm==2.0.1
matplotlib==3.0.2
numpy==1.15.4
pandas==0.23.4
scipy==1.1.0
seaborn==0.9.0
tables==3.4.4
tensorboard==1.12.2
tensorflow==1.12.0
tqdm==4.28.1
\ No newline at end of file