최인훈

fix

from __future__ import division
from roipool2 import *
from models import *
from utils.utils import *
from utils.datasets import *
from video_capture import BufferlessVideoCapture
import serial
import os
import sys
import time
import datetime
import argparse
import cv2
from PIL import Image
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.ticker import NullLocator
def changeRGB2BGR(img):
r = img[:, :, 0].copy()
g = img[:, :, 1].copy()
b = img[:, :, 2].copy()
# RGB > BGR
img[:, :, 0] = b
img[:, :, 1] = g
img[:, :, 2] = r
return img
def changeBGR2RGB(img):
b = img[:, :, 0].copy()
g = img[:, :, 1].copy()
r = img[:, :, 2].copy()
img[:, :, 0] = r
img[:, :, 1] = g
img[:, :, 2] = b
return img
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--image_folder", type=str, default="data/cafe_distance/1.jpg", help="path to dataset")
parser.add_argument("--video_file", type=str, default="0", help="path to dataset")
parser.add_argument("--model_def", type=str, default="config/yolov3-tiny.cfg", help="path to model definition file")
# parser.add_argument("--weights_path", type=str, default="weights/yolov3-tiny.weights", help="path to weights file")
parser.add_argument("--weights_path", type=str, default="checkpoints_yolo/tiny1_2500.pth", help="path to weights file")
parser.add_argument("--class_path", type=str, default="data/cafe_distance/classes.names", help="path to class label file")
parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold")
parser.add_argument("--nms_thres", type=float, default=0.4, help="iou thresshold for non-maximum suppression")
parser.add_argument("--batch_size", type=int, default=1, help="size of the batches")
parser.add_argument("--n_cpu", type=int, default=0, help="number of cpu threads to use during batch generation")
parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
parser.add_argument("--checkpoint_model", type=str, help="path to checkpoint model")
parser.add_argument("--target_object", type=int, default=0)
opt = parser.parse_args()
print(opt)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs("output", exist_ok=True)
sclient = serial.Serial(port='/dev/ttyAMA0', baudrate=115200, timeout=0.1)
if sclient.isOpen():
print('Serial is Open')
# Set up model
model = Darknet(opt.model_def, img_size=opt.img_size).to(device)
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Params: ', params)
if opt.weights_path.endswith(".weights"):
# Load darknet weights
model.load_darknet_weights(opt.weights_path)
else:
# Load checkpoint weights
model.load_state_dict(torch.load(opt.weights_path, map_location=device))
model.eval() # Set in evaluation mode
model_distance = ROIPool((3, 3)).to(device)
model_distance.load_state_dict(torch.load('checkpoints_distance/tiny1_340.pth', map_location=device))
model_distance.eval()
dataloader = DataLoader(
ImageFolder(opt.image_folder, img_size=opt.img_size),
batch_size=opt.batch_size,
shuffle=False,
num_workers=opt.n_cpu,
)
classes = load_classes(opt.class_path) # Extracts class labels from file
Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
cap = BufferlessVideoCapture(0)
# cap = cv2.VideoCapture('data/cafe_distance/videos/output17.avi')
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype="uint8")
a=[]
time_begin = time.time()
NUM = cap.get(cv2.CAP_PROP_FRAME_COUNT)
fourcc = cv2.VideoWriter_fourcc('D', 'I', 'V', 'X')
out = cv2.VideoWriter('output/distance3.avi', fourcc, 30, (640,480))
mode = 0
while cap.isOpened():
ret, img = cap.read()
if ret is False:
break
# img = cv2.resize(img, (1280, 960), interpolation=cv2.INTER_CUBIC)
RGBimg=changeBGR2RGB(img)
imgTensor = transforms.ToTensor()(RGBimg)
imgTensor, _ = pad_to_square(imgTensor, 0)
imgTensor = resize(imgTensor, 416)
imgTensor = imgTensor.unsqueeze(0)
imgTensor = Variable(imgTensor.type(Tensor))
with torch.no_grad():
# prev_time = time.time()
featuremap, detections = model(imgTensor)
# print(featuremap)
# current_time = time.time()
# sec = current_time - prev_time
# fps = 1/sec
# frame_per_sec = "FPS: %0.1f" % fps
# print(frame_per_sec)
detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)
# print(f'none test = {detections}')
a.clear()
if detections is not None and detections[0] is not None:
# print(detections)
featuremap = Variable(featuremap.to(device))
detects = Variable(detections[0], requires_grad=False)
# print(f'detects = {detects}')
# print(f'featuremap = {featuremap.shape}')
outputs = model_distance(featuremap, detects)
print(f'distance = {outputs}')
a.extend(detections)
if len(a):
for detections in a:
if detections is not None:
# print(detections)
detections = rescale_boxes(detections, opt.img_size, RGBimg.shape[:2])
# print(detections)
unique_labels = detections[:, -1].cpu().unique()
n_cls_preds = len(unique_labels)
for i, (x1, y1, x2, y2, conf, cls_conf, cls_pred) in enumerate(detections):
if(classes[int(cls_pred)] == opt.target_object):
target_distance = float(outputs[i])
if(mode == 0):
if target_distance > 8:
sclient.write(serial.to_bytes([int('1', 16)]))
break
else:
mode = 1
break
elif(mode == 1):
box_w = x2 - x1
target_location = int(x1+box_w/2)
if target_location < 300:
sclient.write(serial.to_bytes([int('2', 16)]))
break
elif target_location > 340:
sclient.write(serial.to_bytes([int('3', 16)]))
break
else:
sclient.write(serial.to_bytes([int('4', 16)]))
break
#box_w = x2 - x1
# print(box_w)
#box_h = y2 - y1
# print(y2, y1)
# color = [int(c) for c in colors[int(cls_pred)]]
#print(cls_conf)
# img = cv2.rectangle(img, (x1, y1 + box_h), (x2, y1), color, 2)
# cv2.putText(img, classes[int(cls_pred)], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# cv2.putText(img, str("%.2f" % float(outputs[i])), (x2, y2 - box_h), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
# color, 2)
# print(classes[int(cls_pred)], int(x1+box_w/2), int(480-(y1+box_h/2)))
#print()
#print()
#cv2.putText(img,"Hello World!",(400,50),cv2.FONT_HERSHEY_PLAIN,2.0,(0,0,255),2)
# cv2.imshow('frame', changeRGB2BGR(RGBimg))
# out.write(changeRGB2BGR(RGBimg))
#cv2.waitKey(0)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
time_end = time.time()
time_total = time_end - time_begin
print(NUM // time_total)
sclient.close()
cap.release()
out.release()
cv2.destroyAllWindows()
'''
capture = cv2.VideoCapture("data/cafe/9.mp4")
capture.set(cv2.CAP_PROP_FRAME_WIDTH, 416)
capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 416)
capture.set(cv2.CAP_PROP_FPS, 3)
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype="uint8")
capture.set(5, 5)
print(capture.get(cv2.CAP_PROP_FRAME_WIDTH), capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
print("FPS: ", capture.get(5))
startTime = time.time()
a=[]
while capture.isOpened():
ret, frame = capture.read()
# print()
nowTime = time.time()
PILimg = np.array(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
# RGBimg = changeBGR2RGB(frame)
imgTensor = transforms.ToTensor()(PILimg)
imgTensor, _ = pad_to_square(imgTensor, 0)
imgTensor = resize(imgTensor, 416)
imgTensor = imgTensor.unsqueeze(0)
imgTensor = Variable(imgTensor.type(Tensor))
with torch.no_grad():
prev_time = time.time()
detections = model(imgTensor)
current_time = time.time()
sec = current_time - prev_time
fps = 1/sec
frame_per_sec = "FPS: %0.1f" % fps
# inference_time = datetime.timedelta(seconds=current_time - prev_time)
prev_time = current_time
red = (0, 0, 255)
cv2.putText(frame, frame_per_sec, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, red, 2)
detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)
a.clear()
if detections is not None:
a.extend(detections)
b=len(a)
if len(a):
for detections in a:
if detections is not None:
detections = rescale_boxes(detections, opt.img_size, PILimg.shape[:2])
unique_labels = detections[:, -1].cpu().unique()
n_cls_preds = len(unique_labels)
for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
if classes[int(cls_pred)] == 'shrimp cracker':
box_w = x2 - x1
box_h = y2 - y1
color = [int(c) for c in colors[int(cls_pred)]]
# print(cls_conf)
frame = cv2.rectangle(frame, (x1, y1 + box_h), (x2, y1), color, 2)
cv2.putText(frame, classes[int(cls_pred)], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
cv2.putText(frame, str("%.2f" % float(conf)), (x2, y2 - box_h), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
color, 2)
print(classes[int(cls_pred)], int(x1+box_w/2), int(224-(y1+box_h/2)))
print()
#cv2.putText(img,"Hello World!",(400,50),cv2.FONT_HERSHEY_PLAIN,2.0,(0,0,255),2)
#cv2.namedWindow('frame', cv2.WINDOW_NORMAL)
cv2.imshow('frame', frame)
#cv2.waitKey(0)
if cv2.waitKey(25) & 0xFF == ord('q'):
break
capture.release()
cv2.destroyAllWindows()
'''
'''
imgs = [] # Stores image paths
img_detections = [] # Stores detections for each image index
print('parameter count: ', count_parameters(model))
print("\nPerforming object detection:")
prev_time = time.time()
for batch_i, (img_paths, input_imgs) in enumerate(dataloader):
# Configure input
input_imgs = Variable(input_imgs.type(Tensor))
# Get detections
with torch.no_grad():
detections = model(input_imgs)
detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)
# Log progress
current_time = time.time()
inference_time = datetime.timedelta(seconds=current_time - prev_time)
prev_time = current_time
print("\t+ Batch %d, Inference Time: %s" % (batch_i, inference_time))
# Save image and detections
imgs.extend(img_paths)
img_detections.extend(detections)
# Bounding-box colors
cmap = plt.get_cmap("tab20b")
colors = [cmap(i) for i in np.linspace(0, 1, 20)]
print("\nSaving images:")
# Iterate through images and save plot of detections
for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
print("(%d) Image: '%s'" % (img_i, path))
# Create plot
img = np.array(Image.open(path))
plt.figure()
fig, ax = plt.subplots(1)
ax.imshow(img)
# Draw bounding boxes and labels of detections
if detections is not None:
# Rescale boxes to original image
detections = rescale_boxes(detections, opt.img_size, img.shape[:2])
unique_labels = detections[:, -1].cpu().unique()
n_cls_preds = len(unique_labels)
bbox_colors = random.sample(colors, n_cls_preds)
for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
print("\t+ Label: %s, Conf: %.5f" % (classes[int(cls_pred)], cls_conf.item()))
box_w = x2 - x1
box_h = y2 - y1
color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])]
# Create a Rectangle patch
bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none")
# Add the bbox to the plot
ax.add_patch(bbox)
# Add label
plt.text(
x1,
y1,
s=str(classes[int(cls_pred)])+' '+str(int(x1+box_w/2))+ ', '+str(int(y1+box_h/2)),
color="white",
verticalalignment="top",
bbox={"color": color, "pad": 0},
)
# Save generated image with detections
plt.axis("off")
plt.gca().xaxis.set_major_locator(NullLocator())
plt.gca().yaxis.set_major_locator(NullLocator())
filename = path.split("/")[-1].split("\\")[-1].split(".")[0]
plt.savefig(f"output/{filename}.png", bbox_inches="tight", pad_inches=0.0)
plt.close()
'''
from __future__ import division
from roipool2 import *
from models import *
from utils.utils import *
from utils.datasets import *
from utils.parse_config import *
# from test import evaluate
from terminaltables import AsciiTable
import os
import sys
import time
import datetime
import argparse
import warnings
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch.autograd import Variable
import torch.optim as optim
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
if __name__ == '__main__':
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device: ', device)
data_config = parse_data_config('config/cafe_distance.data')
train_path = data_config["train"]
valid_path = data_config["valid"]
class_names = load_classes(data_config["names"])
model = Darknet('config/yolov3-tiny.cfg', 416).to(device)
model.load_state_dict(torch.load('checkpoints_cafe_distance/tiny1_2500.pth', map_location=device))
model.eval()
dataset = ListDataset(train_path, augment=True, multiscale=True)
dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=1,
shuffle=True,
num_workers=4,
pin_memory=True,
collate_fn=dataset.collate_fn,
)
model_distance = ROIPool((3, 3)).to(device)
model_parameters = filter(lambda p: p.requires_grad, model_distance.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Params: ', params)
optimizer = torch.optim.Adam(model_distance.parameters())
a = []
for epoch in range(2000):
warnings.filterwarnings('ignore', category=UserWarning)
for batch_i, (img_path, imgs, targets, targets_distance) in enumerate(dataloader):
imgs = Variable(imgs.to(device))
with torch.no_grad():
featuremap, detections = model(imgs)
# print(featuremap.shape)
featuremap = Variable(featuremap.to(device))
detections = non_max_suppression(detections, 0.8, 0.4)
targets_distance = torch.tensor(targets_distance[0])
targets_distance = Variable(targets_distance, requires_grad=True)
if detections is not None:
detections[0] = Variable(detections[0], requires_grad=True)
loss, outputs = model_distance(featuremap, detections[0], targets=targets_distance)
# loss = torch.tensor([loss]).to(device)
# loss.requires_grad = True
# print(model_distance.fc1.bias)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# print(model_distance.fc1.bias)
# print(batch_i)
print(epoch)
# print(featuremap)
if epoch % 10 == 0:
optimizer.param_groups[0]['lr'] /= 2
if epoch % 10 == 0:
torch.save(model_distance.state_dict(), f'checkpoints_distance11/tiny1_{epoch}.pth')
from __future__ import division
from models import *
from roipool import *
# from utils.logger import *
from utils.utils import *
from utils.datasets import *
from utils.parse_config import *
# from test import evaluate
from terminaltables import AsciiTable
import os
import sys
import time
import datetime
import argparse
import warnings
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch.autograd import Variable
import torch.optim as optim
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
if __name__ == "__main__":
warnings.filterwarnings("ignore", category=UserWarning)
parser = argparse.ArgumentParser()
parser.add_argument("--epochs", type=int, default=8001, help="number of epochs")
parser.add_argument("--batch_size", type=int, default=1, help="size of each image batch")
parser.add_argument("--gradient_accumulations", type=int, default=2, help="number of gradient accums before step")
parser.add_argument("--model_def", type=str, default="config/yolov3-tiny.cfg", help="path to model definition file")
parser.add_argument("--data_config", type=str, default="config/testdata.data", help="path to data config file")
parser.add_argument("--pretrained_weights", type=str, help="if specified starts from checkpoint model")
parser.add_argument("--n_cpu", type=int, default=4, help="number of cpu threads to use during batch generation")
parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
parser.add_argument("--checkpoint_interval", type=int, default=50, help="interval between saving model weights")
parser.add_argument("--evaluation_interval", type=int, default=10000, help="interval evaluations on validation set")
parser.add_argument("--compute_map", default=False, help="if True computes mAP every tenth batch")
parser.add_argument("--multiscale_training", default=True, help="allow for multi-scale training")
opt = parser.parse_args()
print(opt)
# logger = Logger("logs")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device: ', device)
os.makedirs("output", exist_ok=True)
os.makedirs("checkpoints", exist_ok=True)
# Get data configuration
data_config = parse_data_config(opt.data_config)
train_path = data_config["train"]
valid_path = data_config["valid"]
class_names = load_classes(data_config["names"])
# Initiate model
model = Darknet(opt.model_def).to(device)
model.apply(weights_init_normal)
model_distance = ROIPool((7, 7)).to(device)
# If specified we start from checkpoint
if opt.pretrained_weights:
if opt.pretrained_weights.endswith(".pth"):
model.load_state_dict(torch.load(opt.pretrained_weights))
else:
model.load_darknet_weights(opt.pretrained_weights)
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Params: ', params)
# Get dataloader
dataset = ListDataset(train_path, augment=True, multiscale=opt.multiscale_training)
dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=opt.batch_size,
shuffle=False,
num_workers=opt.n_cpu,
pin_memory=True,
collate_fn=dataset.collate_fn,
)
optimizer = torch.optim.Adam(model.parameters())
metrics = [
"grid_size",
"loss",
"x",
"y",
"w",
"h",
"conf",
"cls",
"cls_acc",
"recall50",
"recall75",
"precision",
"conf_obj",
"conf_noobj",
]
for epoch in range(opt.epochs):
model.train()
warnings.filterwarnings('ignore', category=UserWarning)
start_time = time.time()
for batch_i, (_, imgs, targets) in enumerate(dataloader):
batches_done = len(dataloader) * epoch + batch_i
imgs = Variable(imgs.to(device))
targets = Variable(targets.to(device), requires_grad=False)
loss, outputs = model(imgs, targets)
print(f'targets = {targets}')
loss.backward()
if batches_done % opt.gradient_accumulations:
# Accumulates gradient before each step
optimizer.step()
optimizer.zero_grad()
# ----------------
# Log progress
# ----------------
log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (epoch, opt.epochs, batch_i, len(dataloader))
metric_table = [["Metrics", *[f"YOLO Layer {i}" for i in range(len(model.yolo_layers))]]]
# Log metrics at each YOLO layer
for i, metric in enumerate(metrics):
formats = {m: "%.6f" for m in metrics}
formats["grid_size"] = "%2d"
formats["cls_acc"] = "%.2f%%"
row_metrics = [formats[metric] % yolo.metrics.get(metric, 0) for yolo in model.yolo_layers]
metric_table += [[metric, *row_metrics]]
# Tensorboard logging
tensorboard_log = []
for j, yolo in enumerate(model.yolo_layers):
for name, metric in yolo.metrics.items():
if name != "grid_size":
tensorboard_log += [(f"{name}_{j+1}", metric)]
tensorboard_log += [("loss", loss.item())]
# logger.list_of_scalars_summary(tensorboard_log, batches_done)
log_str += AsciiTable(metric_table).table
log_str += f"\nTotal loss {loss.item()}"
# Determine approximate time left for epoch
epoch_batches_left = len(dataloader) - (batch_i + 1)
time_left = datetime.timedelta(seconds=epoch_batches_left * (time.time() - start_time) / (batch_i + 1))
log_str += f"\n---- ETA {time_left}"
print(log_str)
model.seen += imgs.size(0)
if epoch % opt.evaluation_interval == 0 and epoch != 0:
print("\n---- Evaluating Model ----")
# Evaluate the model on the validation set
precision, recall, AP, f1, ap_class = evaluate(
model,
path=valid_path,
iou_thres=0.5,
conf_thres=0.5,
nms_thres=0.5,
img_size=opt.img_size,
batch_size=1,
)
evaluation_metrics = [
("val_precision", precision.mean()),
("val_recall", recall.mean()),
("val_mAP", AP.mean()),
("val_f1", f1.mean()),
]
# logger.list_of_scalars_summary(evaluation_metrics, epoch)
# Print class APs and mAP
ap_table = [["Index", "Class name", "AP"]]
for i, c in enumerate(ap_class):
ap_table += [[c, class_names[c], "%.5f" % AP[i]]]
print(AsciiTable(ap_table).table)
print(f"---- mAP {AP.mean()}")
if epoch % opt.checkpoint_interval == 0:
torch.save(model.state_dict(), f"checkpoints_fire/tiny1_%d.pth" % (epoch))
import cv2
import queue
import threading
class BufferlessVideoCapture:
'''
BufferlessVideoCapture is a wrapper for cv2.VideoCapture,
which doesn't have frame buffer.
@param name: videocapture name
'''
def __init__(self, name):
self.cap = cv2.VideoCapture(name)
self.q = queue.Queue()
self.thr = threading.Thread(target=self._reader)
self.thr.daemon = True
self.thr.start()
def _reader(self):
'''
Main loop for thread.
'''
while True:
ret, frame = self.cap.read()
if not ret:
break
if not self.q.empty():
try:
self.q.get_nowait() # discard previous (unprocessed) frame
except queue.Empty:
pass
if self.q.qsize() > 2:
print(self.q.qsize())
self.q.put(frame)
def isOpened(self):
return self.cap.isOpened()
def release(self):
self.cap.release()
def read(self):
'''
Read current frame.
'''
return True, self.q.get()
def close(self):
pass
\ No newline at end of file