김연수

delete retinaNet

Showing 86 changed files with 0 additions and 4864 deletions
#see https://github.com/codecov/support/wiki/Codecov-Yaml
codecov:
notify:
require_ci_to_pass: yes
coverage:
precision: 0 # 2 = xx.xx%, 0 = xx%
round: nearest # how coverage is rounded: down/up/nearest
range: 40...100 # custom range of coverage colors from red -> yellow -> green
status:
# https://codecov.readme.io/v1.0/docs/commit-status
project:
default:
against: auto
target: 90% # specify the target coverage for each commit status
threshold: 20% # allow this little decrease on project
# https://github.com/codecov/support/wiki/Filtering-Branches
# branches: master
if_ci_failed: error
# https://github.com/codecov/support/wiki/Patch-Status
patch:
default:
against: auto
target: 40% # specify the target "X%" coverage to hit
# threshold: 50% # allow this much decrease on patch
changes: false
parsers:
gcov:
branch_detection:
conditional: true
loop: true
macro: false
method: false
javascript:
enable_partials: false
comment:
layout: header, diff
require_changes: false
behavior: default # update if exists else create new
# branches: *
\ No newline at end of file
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# Distribution / packaging
.Python
/build/
/dist/
/eggs/
/*-eggs/
.eggs/
/sdist/
/wheels/
/*.egg-info/
.installed.cfg
*.egg
# Unit test / coverage reports
.coverage
.coverage.*
coverage.xml
*.cover
\ No newline at end of file
[submodule "tests/test-data"]
path = tests/test-data
url = https://github.com/fizyr/keras-retinanet-test-data.git
language: python
sudo: required
python:
- '3.6'
- '3.7'
install:
- pip install -r requirements.txt
- pip install -r tests/requirements.txt
cache: pip
script:
- python setup.py check -m -s
- python setup.py build_ext --inplace
- coverage run --source keras_retinanet -m py.test keras_retinanet tests --doctest-modules --forked --flake8
after_success:
- coverage xml
- coverage report
- codecov
# Contributors
This is a list of people who contributed patches to keras-retinanet.
If you feel you should be listed here or if you have any other questions/comments on your listing here,
please create an issue or pull request at https://github.com/fizyr/keras-retinanet/
* Hans Gaiser <h.gaiser@fizyr.com>
* Maarten de Vries <maarten@de-vri.es>
* Valerio Carpani
* Ashley Williamson
* Yann Henon
* Valeriu Lacatusu
* András Vidosits
* Cristian Gratie
* jjiunlin
* Sorin Panduru
* Rodrigo Meira de Andrade
* Enrico Liscio <e.liscio@fizyr.com>
* Mihai Morariu
* pedroconceicao
* jjiun
* Wudi Fang
* Mike Clark
* hannesedvartsen
* Max Van Sande
* Pierre Dérian
* ori
* mxvs
* mwilder
* Muhammed Kocabas
* Koen Vijverberg
* iver56
* hnsywangxin
* Guillaume Erhard
* Eduardo Ramos
* DiegoAgher
* Alexander Pacha
* Agastya Kalra
* Jiri BOROVEC
* ntsagko
* charlie / tianqi
* jsemric
* Martin Zlocha
* Raghav Bhardwaj
* bw4sz
* Morten Back Nielsen
* dshahrokhian
* Alex / adreo00
* simone.merello
* Matt Wilder
* Jinwoo Baek
* Etienne Meunier
* Denis Dowling
* cclauss
* Andrew Grigorev
* ZFTurbo
* UgoLouche
* Richard Higgins
* Rajat / rajat.goel
* philipp.marquardt
* peacherwu
* Paul / pauldesigaud
* Martin Genet
* Leo / leonardvandriel
* Laurens Hagendoorn
* Julius / juliussimonelli
* HolyGuacamole
* Fausto Morales
* borakrc
* Ben Weinstein
* Anil Karaka
* Andrea Panizza
* Bruno Santos
\ No newline at end of file
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
[anchor_parameters]
# Sizes should correlate to how the network processes an image, it is not advised to change these!
sizes = 64 128 256
# Strides should correlate to how the network strides over an image, it is not advised to change these!
strides = 16 32 64
# The different ratios to use per anchor location.
ratios = 0.5 1 2 3
# The different scaling factors to use per anchor location.
scales = 1 1.2 1.6
[pyramid_levels]
levels = 3 4 5
\ No newline at end of file
[.ShellClassInfo]
IconResource=C:\WINDOWS\System32\SHELL32.dll,3
[ViewState]
Mode=
Vid=
FolderType=Generic
This diff could not be displayed because it is too large.
#!/usr/bin/env python
# coding: utf-8
# Load necessary modules
import sys
sys.path.insert(0, "../")
# import keras_retinanet
from keras_retinanet import models
from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
from keras_retinanet.utils.visualization import draw_box, draw_caption
from keras_retinanet.utils.colors import label_color
from keras_retinanet.utils.gpu import setup_gpu
# import miscellaneous modules
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
import time
# set tf backend to allow memory to grow, instead of claiming everything
import tensorflow as tf
# use this to change which GPU to use
gpu = 0
# set the modified tf session as backend in keras
setup_gpu(gpu)
# ## Load RetinaNet model
# In[ ]:
# adjust this to point to your downloaded/trained model
# models can be downloaded here: https://github.com/fizyr/keras-retinanet/releases
model_path = os.path.join("..", "snapshots", "resnet50_coco_best_v2.1.0.h5")
# load retinanet model
model = models.load_model(model_path, backbone_name="resnet50")
# if the model is not converted to an inference model, use the line below
# see: https://github.com/fizyr/keras-retinanet#converting-a-training-model-to-inference-model
# model = models.convert_model(model)
# print(model.summary())
# load label to names mapping for visualization purposes
labels_to_names = {
0: "person",
1: "bicycle",
2: "car",
3: "motorcycle",
4: "airplane",
5: "bus",
6: "train",
7: "truck",
8: "boat",
9: "traffic light",
10: "fire hydrant",
11: "stop sign",
12: "parking meter",
13: "bench",
14: "bird",
15: "cat",
16: "dog",
17: "horse",
18: "sheep",
19: "cow",
20: "elephant",
21: "bear",
22: "zebra",
23: "giraffe",
24: "backpack",
25: "umbrella",
26: "handbag",
27: "tie",
28: "suitcase",
29: "frisbee",
30: "skis",
31: "snowboard",
32: "sports ball",
33: "kite",
34: "baseball bat",
35: "baseball glove",
36: "skateboard",
37: "surfboard",
38: "tennis racket",
39: "bottle",
40: "wine glass",
41: "cup",
42: "fork",
43: "knife",
44: "spoon",
45: "bowl",
46: "banana",
47: "apple",
48: "sandwich",
49: "orange",
50: "broccoli",
51: "carrot",
52: "hot dog",
53: "pizza",
54: "donut",
55: "cake",
56: "chair",
57: "couch",
58: "potted plant",
59: "bed",
60: "dining table",
61: "toilet",
62: "tv",
63: "laptop",
64: "mouse",
65: "remote",
66: "keyboard",
67: "cell phone",
68: "microwave",
69: "oven",
70: "toaster",
71: "sink",
72: "refrigerator",
73: "book",
74: "clock",
75: "vase",
76: "scissors",
77: "teddy bear",
78: "hair drier",
79: "toothbrush",
}
# ## Run detection on example
# In[ ]:
# load image
image = read_image_bgr("000000008021.jpg")
# copy to draw on
draw = image.copy()
draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)
# preprocess image for network
image = preprocess_image(image)
image, scale = resize_image(image)
# process image
start = time.time()
boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))
print("processing time: ", time.time() - start)
# correct for image scale
boxes /= scale
# visualize detections
for box, score, label in zip(boxes[0], scores[0], labels[0]):
# scores are sorted so we can break
if score < 0.5:
break
color = label_color(label)
b = box.astype(int)
draw_box(draw, b, color=color)
caption = "{} {:.3f}".format(labels_to_names[label], score)
draw_caption(draw, b, caption)
plt.figure(figsize=(15, 15))
plt.axis("off")
plt.imshow(draw)
plt.show()
# In[ ]:
# In[ ]:
from .backend import * # noqa: F401,F403
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow
from tensorflow import keras
def bbox_transform_inv(boxes, deltas, mean=None, std=None):
""" Applies deltas (usually regression results) to boxes (usually anchors).
Before applying the deltas to the boxes, the normalization that was previously applied (in the generator) has to be removed.
The mean and std are the mean and std as applied in the generator. They are unnormalized in this function and then applied to the boxes.
Args
boxes : np.array of shape (B, N, 4), where B is the batch size, N the number of boxes and 4 values for (x1, y1, x2, y2).
deltas: np.array of same shape as boxes. These deltas (d_x1, d_y1, d_x2, d_y2) are a factor of the width/height.
mean : The mean value used when computing deltas (defaults to [0, 0, 0, 0]).
std : The standard deviation used when computing deltas (defaults to [0.2, 0.2, 0.2, 0.2]).
Returns
A np.array of the same shape as boxes, but with deltas applied to each box.
The mean and std are used during training to normalize the regression values (networks love normalization).
"""
if mean is None:
mean = [0, 0, 0, 0]
if std is None:
std = [0.2, 0.2, 0.2, 0.2]
width = boxes[:, :, 2] - boxes[:, :, 0]
height = boxes[:, :, 3] - boxes[:, :, 1]
x1 = boxes[:, :, 0] + (deltas[:, :, 0] * std[0] + mean[0]) * width
y1 = boxes[:, :, 1] + (deltas[:, :, 1] * std[1] + mean[1]) * height
x2 = boxes[:, :, 2] + (deltas[:, :, 2] * std[2] + mean[2]) * width
y2 = boxes[:, :, 3] + (deltas[:, :, 3] * std[3] + mean[3]) * height
pred_boxes = keras.backend.stack([x1, y1, x2, y2], axis=2)
return pred_boxes
def shift(shape, stride, anchors):
""" Produce shifted anchors based on shape of the map and stride size.
Args
shape : Shape to shift the anchors over.
stride : Stride to shift the anchors with over the shape.
anchors: The anchors to apply at each location.
"""
shift_x = (keras.backend.arange(0, shape[1], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride
shift_y = (keras.backend.arange(0, shape[0], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride
shift_x, shift_y = tensorflow.meshgrid(shift_x, shift_y)
shift_x = keras.backend.reshape(shift_x, [-1])
shift_y = keras.backend.reshape(shift_y, [-1])
shifts = keras.backend.stack([
shift_x,
shift_y,
shift_x,
shift_y
], axis=0)
shifts = keras.backend.transpose(shifts)
number_of_anchors = keras.backend.shape(anchors)[0]
k = keras.backend.shape(shifts)[0] # number of base points = feat_h * feat_w
shifted_anchors = keras.backend.reshape(anchors, [1, number_of_anchors, 4]) + keras.backend.cast(keras.backend.reshape(shifts, [k, 1, 4]), keras.backend.floatx())
shifted_anchors = keras.backend.reshape(shifted_anchors, [k * number_of_anchors, 4])
return shifted_anchors
def map_fn(*args, **kwargs):
""" See https://www.tensorflow.org/api_docs/python/tf/map_fn .
"""
if "shapes" in kwargs:
shapes = kwargs.pop("shapes")
dtype = kwargs.pop("dtype")
sig = [tensorflow.TensorSpec(shapes[i], dtype=t) for i, t in
enumerate(dtype)]
# Try to use the new feature fn_output_signature in TF 2.3, use fallback if this is not available
try:
return tensorflow.map_fn(*args, **kwargs, fn_output_signature=sig)
except TypeError:
kwargs["dtype"] = dtype
return tensorflow.map_fn(*args, **kwargs)
def resize_images(images, size, method='bilinear', align_corners=False):
""" See https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/image/resize_images .
Args
method: The method used for interpolation. One of ('bilinear', 'nearest', 'bicubic', 'area').
"""
methods = {
'bilinear': tensorflow.image.ResizeMethod.BILINEAR,
'nearest' : tensorflow.image.ResizeMethod.NEAREST_NEIGHBOR,
'bicubic' : tensorflow.image.ResizeMethod.BICUBIC,
'area' : tensorflow.image.ResizeMethod.AREA,
}
return tensorflow.compat.v1.image.resize_images(images, size, methods[method], align_corners)
#!/usr/bin/env python
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import argparse
import os
import sys
# Allow relative imports when being executed as script.
if __name__ == "__main__" and __package__ is None:
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
import keras_retinanet.bin # noqa: F401
__package__ = "keras_retinanet.bin"
# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from .. import models
from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
from ..utils.gpu import setup_gpu
from ..utils.tf_version import check_tf_version
def parse_args(args):
parser = argparse.ArgumentParser(description='Script for converting a training model to an inference model.')
parser.add_argument('model_in', help='The model to convert.')
parser.add_argument('model_out', help='Path to save the converted model to.')
parser.add_argument('--backbone', help='The backbone of the model to convert.', default='resnet50')
parser.add_argument('--no-nms', help='Disables non maximum suppression.', dest='nms', action='store_false')
parser.add_argument('--no-class-specific-filter', help='Disables class specific filtering.', dest='class_specific_filter', action='store_false')
parser.add_argument('--config', help='Path to a configuration parameters .ini file.')
parser.add_argument('--nms-threshold', help='Value for non maximum suppression threshold.', type=float, default=0.5)
parser.add_argument('--score-threshold', help='Threshold for prefiltering boxes.', type=float, default=0.05)
parser.add_argument('--max-detections', help='Maximum number of detections to keep.', type=int, default=300)
parser.add_argument('--parallel-iterations', help='Number of batch items to process in parallel.', type=int, default=32)
return parser.parse_args(args)
def main(args=None):
# parse arguments
if args is None:
args = sys.argv[1:]
args = parse_args(args)
# make sure tensorflow is the minimum required version
check_tf_version()
# set modified tf session to avoid using the GPUs
setup_gpu('cpu')
# optionally load config parameters
anchor_parameters = None
pyramid_levels = None
if args.config:
args.config = read_config_file(args.config)
if 'anchor_parameters' in args.config:
anchor_parameters = parse_anchor_parameters(args.config)
if 'pyramid_levels' in args.config:
pyramid_levels = parse_pyramid_levels(args.config)
# load the model
model = models.load_model(args.model_in, backbone_name=args.backbone)
# check if this is indeed a training model
models.check_training_model(model)
# convert the model
model = models.convert_model(
model,
nms=args.nms,
class_specific_filter=args.class_specific_filter,
anchor_params=anchor_parameters,
pyramid_levels=pyramid_levels,
nms_threshold=args.nms_threshold,
score_threshold=args.score_threshold,
max_detections=args.max_detections,
parallel_iterations=args.parallel_iterations
)
# save model
model.save(args.model_out)
if __name__ == '__main__':
main()
This diff is collapsed. Click to expand it.
#!/usr/bin/env python
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import argparse
import os
import sys
# Allow relative imports when being executed as script.
if __name__ == "__main__" and __package__ is None:
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
import keras_retinanet.bin # noqa: F401
__package__ = "keras_retinanet.bin"
# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from .. import models
from ..preprocessing.csv_generator import CSVGenerator
from ..preprocessing.pascal_voc import PascalVocGenerator
from ..utils.anchors import make_shapes_callback
from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
from ..utils.eval import evaluate
from ..utils.gpu import setup_gpu
from ..utils.tf_version import check_tf_version
def create_generator(args, preprocess_image):
""" Create generators for evaluation.
"""
common_args = {
'config' : args.config,
'image_min_side' : args.image_min_side,
'image_max_side' : args.image_max_side,
'no_resize' : args.no_resize,
'preprocess_image' : preprocess_image,
'group_method' : args.group_method
}
if args.dataset_type == 'coco':
# import here to prevent unnecessary dependency on cocoapi
from ..preprocessing.coco import CocoGenerator
validation_generator = CocoGenerator(
args.coco_path,
'val2017',
shuffle_groups=False,
**common_args
)
elif args.dataset_type == 'pascal':
validation_generator = PascalVocGenerator(
args.pascal_path,
'test',
image_extension=args.image_extension,
shuffle_groups=False,
**common_args
)
elif args.dataset_type == 'csv':
validation_generator = CSVGenerator(
args.annotations,
args.classes,
shuffle_groups=False,
**common_args
)
else:
raise ValueError('Invalid data type received: {}'.format(args.dataset_type))
return validation_generator
def parse_args(args):
""" Parse the arguments.
"""
parser = argparse.ArgumentParser(description='Evaluation script for a RetinaNet network.')
subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type')
subparsers.required = True
coco_parser = subparsers.add_parser('coco')
coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).')
pascal_parser = subparsers.add_parser('pascal')
pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).')
pascal_parser.add_argument('--image-extension', help='Declares the dataset images\' extension.', default='.jpg')
csv_parser = subparsers.add_parser('csv')
csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for evaluation.')
csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.')
parser.add_argument('model', help='Path to RetinaNet model.')
parser.add_argument('--convert-model', help='Convert the model to an inference model (ie. the input is a training model).', action='store_true')
parser.add_argument('--backbone', help='The backbone of the model.', default='resnet50')
parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).')
parser.add_argument('--score-threshold', help='Threshold on score to filter detections with (defaults to 0.05).', default=0.05, type=float)
parser.add_argument('--iou-threshold', help='IoU Threshold to count for a positive detection (defaults to 0.5).', default=0.5, type=float)
parser.add_argument('--max-detections', help='Max Detections per image (defaults to 100).', default=100, type=int)
parser.add_argument('--save-path', help='Path for saving images with detections (doesn\'t work for COCO).')
parser.add_argument('--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800)
parser.add_argument('--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333)
parser.add_argument('--no-resize', help='Don''t rescale the image.', action='store_true')
parser.add_argument('--config', help='Path to a configuration parameters .ini file (only used with --convert-model).')
parser.add_argument('--group-method', help='Determines how images are grouped together', type=str, default='ratio', choices=['none', 'random', 'ratio'])
return parser.parse_args(args)
def main(args=None):
# parse arguments
if args is None:
args = sys.argv[1:]
args = parse_args(args)
# make sure tensorflow is the minimum required version
check_tf_version()
# optionally choose specific GPU
if args.gpu:
setup_gpu(args.gpu)
# make save path if it doesn't exist
if args.save_path is not None and not os.path.exists(args.save_path):
os.makedirs(args.save_path)
# optionally load config parameters
if args.config:
args.config = read_config_file(args.config)
# create the generator
backbone = models.backbone(args.backbone)
generator = create_generator(args, backbone.preprocess_image)
# optionally load anchor parameters
anchor_params = None
pyramid_levels = None
if args.config and 'anchor_parameters' in args.config:
anchor_params = parse_anchor_parameters(args.config)
if args.config and 'pyramid_levels' in args.config:
pyramid_levels = parse_pyramid_levels(args.config)
# load the model
print('Loading model, this may take a second...')
model = models.load_model(args.model, backbone_name=args.backbone)
generator.compute_shapes = make_shapes_callback(model)
# optionally convert the model
if args.convert_model:
model = models.convert_model(model, anchor_params=anchor_params, pyramid_levels=pyramid_levels)
# print model summary
# print(model.summary())
# start evaluation
if args.dataset_type == 'coco':
from ..utils.coco_eval import evaluate_coco
evaluate_coco(generator, model, args.score_threshold)
else:
average_precisions, inference_time = evaluate(
generator,
model,
iou_threshold=args.iou_threshold,
score_threshold=args.score_threshold,
max_detections=args.max_detections,
save_path=args.save_path
)
# print evaluation
total_instances = []
precisions = []
for label, (average_precision, num_annotations) in average_precisions.items():
print('{:.0f} instances of class'.format(num_annotations),
generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
total_instances.append(num_annotations)
precisions.append(average_precision)
if sum(total_instances) == 0:
print('No test instances found.')
return
print('Inference time for {:.0f} images: {:.4f}'.format(generator.size(), inference_time))
print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)))
print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances)))
if __name__ == '__main__':
main()
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
from .common import * # noqa: F401,F403
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from ..utils.coco_eval import evaluate_coco
class CocoEval(keras.callbacks.Callback):
""" Performs COCO evaluation on each epoch.
"""
def __init__(self, generator, tensorboard=None, threshold=0.05):
""" CocoEval callback intializer.
Args
generator : The generator used for creating validation data.
tensorboard : If given, the results will be written to tensorboard.
threshold : The score threshold to use.
"""
self.generator = generator
self.threshold = threshold
self.tensorboard = tensorboard
super(CocoEval, self).__init__()
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
coco_tag = ['AP @[ IoU=0.50:0.95 | area= all | maxDets=100 ]',
'AP @[ IoU=0.50 | area= all | maxDets=100 ]',
'AP @[ IoU=0.75 | area= all | maxDets=100 ]',
'AP @[ IoU=0.50:0.95 | area= small | maxDets=100 ]',
'AP @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]',
'AP @[ IoU=0.50:0.95 | area= large | maxDets=100 ]',
'AR @[ IoU=0.50:0.95 | area= all | maxDets= 1 ]',
'AR @[ IoU=0.50:0.95 | area= all | maxDets= 10 ]',
'AR @[ IoU=0.50:0.95 | area= all | maxDets=100 ]',
'AR @[ IoU=0.50:0.95 | area= small | maxDets=100 ]',
'AR @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]',
'AR @[ IoU=0.50:0.95 | area= large | maxDets=100 ]']
coco_eval_stats = evaluate_coco(self.generator, self.model, self.threshold)
if coco_eval_stats is not None:
for index, result in enumerate(coco_eval_stats):
logs[coco_tag[index]] = result
if self.tensorboard:
import tensorflow as tf
writer = tf.summary.create_file_writer(self.tensorboard.log_dir)
with writer.as_default():
for index, result in enumerate(coco_eval_stats):
tf.summary.scalar('{}. {}'.format(index + 1, coco_tag[index]), result, step=epoch)
writer.flush()
from tensorflow import keras
class RedirectModel(keras.callbacks.Callback):
"""Callback which wraps another callback, but executed on a different model.
```python
model = keras.models.load_model('model.h5')
model_checkpoint = ModelCheckpoint(filepath='snapshot.h5')
parallel_model = multi_gpu_model(model, gpus=2)
parallel_model.fit(X_train, Y_train, callbacks=[RedirectModel(model_checkpoint, model)])
```
Args
callback : callback to wrap.
model : model to use when executing callbacks.
"""
def __init__(self,
callback,
model):
super(RedirectModel, self).__init__()
self.callback = callback
self.redirect_model = model
def on_epoch_begin(self, epoch, logs=None):
self.callback.on_epoch_begin(epoch, logs=logs)
def on_epoch_end(self, epoch, logs=None):
self.callback.on_epoch_end(epoch, logs=logs)
def on_batch_begin(self, batch, logs=None):
self.callback.on_batch_begin(batch, logs=logs)
def on_batch_end(self, batch, logs=None):
self.callback.on_batch_end(batch, logs=logs)
def on_train_begin(self, logs=None):
# overwrite the model with our custom model
self.callback.set_model(self.redirect_model)
self.callback.on_train_begin(logs=logs)
def on_train_end(self, logs=None):
self.callback.on_train_end(logs=logs)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from ..utils.eval import evaluate
class Evaluate(keras.callbacks.Callback):
""" Evaluation callback for arbitrary datasets.
"""
def __init__(
self,
generator,
iou_threshold=0.5,
score_threshold=0.05,
max_detections=100,
save_path=None,
tensorboard=None,
weighted_average=False,
verbose=1
):
""" Evaluate a given dataset using a given model at the end of every epoch during training.
# Arguments
generator : The generator that represents the dataset to evaluate.
iou_threshold : The threshold used to consider when a detection is positive or negative.
score_threshold : The score confidence threshold to use for detections.
max_detections : The maximum number of detections to use per image.
save_path : The path to save images with visualized detections to.
tensorboard : Instance of keras.callbacks.TensorBoard used to log the mAP value.
weighted_average : Compute the mAP using the weighted average of precisions among classes.
verbose : Set the verbosity level, by default this is set to 1.
"""
self.generator = generator
self.iou_threshold = iou_threshold
self.score_threshold = score_threshold
self.max_detections = max_detections
self.save_path = save_path
self.tensorboard = tensorboard
self.weighted_average = weighted_average
self.verbose = verbose
super(Evaluate, self).__init__()
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
# run evaluation
average_precisions, _ = evaluate(
self.generator,
self.model,
iou_threshold=self.iou_threshold,
score_threshold=self.score_threshold,
max_detections=self.max_detections,
save_path=self.save_path
)
# compute per class average precision
total_instances = []
precisions = []
for label, (average_precision, num_annotations) in average_precisions.items():
if self.verbose == 1:
print('{:.0f} instances of class'.format(num_annotations),
self.generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
total_instances.append(num_annotations)
precisions.append(average_precision)
if self.weighted_average:
self.mean_ap = sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)
else:
self.mean_ap = sum(precisions) / sum(x > 0 for x in total_instances)
if self.tensorboard:
import tensorflow as tf
writer = tf.summary.create_file_writer(self.tensorboard.log_dir)
with writer.as_default():
tf.summary.scalar("mAP", self.mean_ap, step=epoch)
if self.verbose == 1:
for label, (average_precision, num_annotations) in average_precisions.items():
tf.summary.scalar("AP_" + self.generator.label_to_name(label), average_precision, step=epoch)
writer.flush()
logs['mAP'] = self.mean_ap
if self.verbose == 1:
print('mAP: {:.4f}'.format(self.mean_ap))
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
import math
class PriorProbability(keras.initializers.Initializer):
""" Apply a prior probability to the weights.
"""
def __init__(self, probability=0.01):
self.probability = probability
def get_config(self):
return {
'probability': self.probability
}
def __call__(self, shape, dtype=None):
# set bias to -log((1 - p)/p) for foreground
result = keras.backend.ones(shape, dtype=dtype) * -math.log((1 - self.probability) / self.probability)
return result
from ._misc import RegressBoxes, UpsampleLike, Anchors, ClipBoxes # noqa: F401
from .filter_detections import FilterDetections # noqa: F401
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow
from tensorflow import keras
from .. import backend
from ..utils import anchors as utils_anchors
import numpy as np
class Anchors(keras.layers.Layer):
""" Keras layer for generating achors for a given shape.
"""
def __init__(self, size, stride, ratios=None, scales=None, *args, **kwargs):
""" Initializer for an Anchors layer.
Args
size: The base size of the anchors to generate.
stride: The stride of the anchors to generate.
ratios: The ratios of the anchors to generate (defaults to AnchorParameters.default.ratios).
scales: The scales of the anchors to generate (defaults to AnchorParameters.default.scales).
"""
self.size = size
self.stride = stride
self.ratios = ratios
self.scales = scales
if ratios is None:
self.ratios = utils_anchors.AnchorParameters.default.ratios
elif isinstance(ratios, list):
self.ratios = np.array(ratios)
if scales is None:
self.scales = utils_anchors.AnchorParameters.default.scales
elif isinstance(scales, list):
self.scales = np.array(scales)
self.num_anchors = len(self.ratios) * len(self.scales)
self.anchors = utils_anchors.generate_anchors(
base_size=self.size,
ratios=self.ratios,
scales=self.scales,
).astype(np.float32)
super(Anchors, self).__init__(*args, **kwargs)
def call(self, inputs, **kwargs):
features = inputs
features_shape = keras.backend.shape(features)
# generate proposals from bbox deltas and shifted anchors
if keras.backend.image_data_format() == 'channels_first':
anchors = backend.shift(features_shape[2:4], self.stride, self.anchors)
else:
anchors = backend.shift(features_shape[1:3], self.stride, self.anchors)
anchors = keras.backend.tile(keras.backend.expand_dims(anchors, axis=0), (features_shape[0], 1, 1))
return anchors
def compute_output_shape(self, input_shape):
if None not in input_shape[1:]:
if keras.backend.image_data_format() == 'channels_first':
total = np.prod(input_shape[2:4]) * self.num_anchors
else:
total = np.prod(input_shape[1:3]) * self.num_anchors
return (input_shape[0], total, 4)
else:
return (input_shape[0], None, 4)
def get_config(self):
config = super(Anchors, self).get_config()
config.update({
'size' : self.size,
'stride' : self.stride,
'ratios' : self.ratios.tolist(),
'scales' : self.scales.tolist(),
})
return config
class UpsampleLike(keras.layers.Layer):
""" Keras layer for upsampling a Tensor to be the same shape as another Tensor.
"""
def call(self, inputs, **kwargs):
source, target = inputs
target_shape = keras.backend.shape(target)
if keras.backend.image_data_format() == 'channels_first':
source = tensorflow.transpose(source, (0, 2, 3, 1))
output = backend.resize_images(source, (target_shape[2], target_shape[3]), method='nearest')
output = tensorflow.transpose(output, (0, 3, 1, 2))
return output
else:
return backend.resize_images(source, (target_shape[1], target_shape[2]), method='nearest')
def compute_output_shape(self, input_shape):
if keras.backend.image_data_format() == 'channels_first':
return (input_shape[0][0], input_shape[0][1]) + input_shape[1][2:4]
else:
return (input_shape[0][0],) + input_shape[1][1:3] + (input_shape[0][-1],)
class RegressBoxes(keras.layers.Layer):
""" Keras layer for applying regression values to boxes.
"""
def __init__(self, mean=None, std=None, *args, **kwargs):
""" Initializer for the RegressBoxes layer.
Args
mean: The mean value of the regression values which was used for normalization.
std: The standard value of the regression values which was used for normalization.
"""
if mean is None:
mean = np.array([0, 0, 0, 0])
if std is None:
std = np.array([0.2, 0.2, 0.2, 0.2])
if isinstance(mean, (list, tuple)):
mean = np.array(mean)
elif not isinstance(mean, np.ndarray):
raise ValueError('Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean)))
if isinstance(std, (list, tuple)):
std = np.array(std)
elif not isinstance(std, np.ndarray):
raise ValueError('Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std)))
self.mean = mean
self.std = std
super(RegressBoxes, self).__init__(*args, **kwargs)
def call(self, inputs, **kwargs):
anchors, regression = inputs
return backend.bbox_transform_inv(anchors, regression, mean=self.mean, std=self.std)
def compute_output_shape(self, input_shape):
return input_shape[0]
def get_config(self):
config = super(RegressBoxes, self).get_config()
config.update({
'mean': self.mean.tolist(),
'std' : self.std.tolist(),
})
return config
class ClipBoxes(keras.layers.Layer):
""" Keras layer to clip box values to lie inside a given shape.
"""
def call(self, inputs, **kwargs):
image, boxes = inputs
shape = keras.backend.cast(keras.backend.shape(image), keras.backend.floatx())
if keras.backend.image_data_format() == 'channels_first':
_, _, height, width = tensorflow.unstack(shape, axis=0)
else:
_, height, width, _ = tensorflow.unstack(shape, axis=0)
x1, y1, x2, y2 = tensorflow.unstack(boxes, axis=-1)
x1 = tensorflow.clip_by_value(x1, 0, width - 1)
y1 = tensorflow.clip_by_value(y1, 0, height - 1)
x2 = tensorflow.clip_by_value(x2, 0, width - 1)
y2 = tensorflow.clip_by_value(y2, 0, height - 1)
return keras.backend.stack([x1, y1, x2, y2], axis=2)
def compute_output_shape(self, input_shape):
return input_shape[1]
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow
from tensorflow import keras
from .. import backend
def filter_detections(
boxes,
classification,
other = [],
class_specific_filter = True,
nms = True,
score_threshold = 0.05,
max_detections = 300,
nms_threshold = 0.5
):
""" Filter detections using the boxes and classification values.
Args
boxes : Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format.
classification : Tensor of shape (num_boxes, num_classes) containing the classification scores.
other : List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores.
class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those.
nms : Flag to enable/disable non maximum suppression.
score_threshold : Threshold used to prefilter the boxes with.
max_detections : Maximum number of detections to keep.
nms_threshold : Threshold for the IoU value to determine when a box should be suppressed.
Returns
A list of [boxes, scores, labels, other[0], other[1], ...].
boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes.
scores is shaped (max_detections,) and contains the scores of the predicted class.
labels is shaped (max_detections,) and contains the predicted label.
other[i] is shaped (max_detections, ...) and contains the filtered other[i] data.
In case there are less than max_detections detections, the tensors are padded with -1's.
"""
def _filter_detections(scores, labels):
# threshold based on score
indices = tensorflow.where(keras.backend.greater(scores, score_threshold))
if nms:
filtered_boxes = tensorflow.gather_nd(boxes, indices)
filtered_scores = keras.backend.gather(scores, indices)[:, 0]
# perform NMS
nms_indices = tensorflow.image.non_max_suppression(filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold)
# filter indices based on NMS
indices = keras.backend.gather(indices, nms_indices)
# add indices to list of all indices
labels = tensorflow.gather_nd(labels, indices)
indices = keras.backend.stack([indices[:, 0], labels], axis=1)
return indices
if class_specific_filter:
all_indices = []
# perform per class filtering
for c in range(int(classification.shape[1])):
scores = classification[:, c]
labels = c * tensorflow.ones((keras.backend.shape(scores)[0],), dtype='int64')
all_indices.append(_filter_detections(scores, labels))
# concatenate indices to single tensor
indices = keras.backend.concatenate(all_indices, axis=0)
else:
scores = keras.backend.max(classification, axis = 1)
labels = keras.backend.argmax(classification, axis = 1)
indices = _filter_detections(scores, labels)
# select top k
scores = tensorflow.gather_nd(classification, indices)
labels = indices[:, 1]
scores, top_indices = tensorflow.nn.top_k(scores, k=keras.backend.minimum(max_detections, keras.backend.shape(scores)[0]))
# filter input using the final set of indices
indices = keras.backend.gather(indices[:, 0], top_indices)
boxes = keras.backend.gather(boxes, indices)
labels = keras.backend.gather(labels, top_indices)
other_ = [keras.backend.gather(o, indices) for o in other]
# zero pad the outputs
pad_size = keras.backend.maximum(0, max_detections - keras.backend.shape(scores)[0])
boxes = tensorflow.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1)
scores = tensorflow.pad(scores, [[0, pad_size]], constant_values=-1)
labels = tensorflow.pad(labels, [[0, pad_size]], constant_values=-1)
labels = keras.backend.cast(labels, 'int32')
other_ = [tensorflow.pad(o, [[0, pad_size]] + [[0, 0] for _ in range(1, len(o.shape))], constant_values=-1) for o in other_]
# set shapes, since we know what they are
boxes.set_shape([max_detections, 4])
scores.set_shape([max_detections])
labels.set_shape([max_detections])
for o, s in zip(other_, [list(keras.backend.int_shape(o)) for o in other]):
o.set_shape([max_detections] + s[1:])
return [boxes, scores, labels] + other_
class FilterDetections(keras.layers.Layer):
""" Keras layer for filtering detections using score threshold and NMS.
"""
def __init__(
self,
nms = True,
class_specific_filter = True,
nms_threshold = 0.5,
score_threshold = 0.05,
max_detections = 300,
parallel_iterations = 32,
**kwargs
):
""" Filters detections using score threshold, NMS and selecting the top-k detections.
Args
nms : Flag to enable/disable NMS.
class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those.
nms_threshold : Threshold for the IoU value to determine when a box should be suppressed.
score_threshold : Threshold used to prefilter the boxes with.
max_detections : Maximum number of detections to keep.
parallel_iterations : Number of batch items to process in parallel.
"""
self.nms = nms
self.class_specific_filter = class_specific_filter
self.nms_threshold = nms_threshold
self.score_threshold = score_threshold
self.max_detections = max_detections
self.parallel_iterations = parallel_iterations
super(FilterDetections, self).__init__(**kwargs)
def call(self, inputs, **kwargs):
""" Constructs the NMS graph.
Args
inputs : List of [boxes, classification, other[0], other[1], ...] tensors.
"""
boxes = inputs[0]
classification = inputs[1]
other = inputs[2:]
# wrap nms with our parameters
def _filter_detections(args):
boxes = args[0]
classification = args[1]
other = args[2]
return filter_detections(
boxes,
classification,
other,
nms = self.nms,
class_specific_filter = self.class_specific_filter,
score_threshold = self.score_threshold,
max_detections = self.max_detections,
nms_threshold = self.nms_threshold,
)
# call filter_detections on each batch
dtypes = [keras.backend.floatx(), keras.backend.floatx(), 'int32'] + [o.dtype for o in other]
shapes = [(self.max_detections, 4), (self.max_detections,), (self.max_detections,)]
shapes.extend([(self.max_detections,) + o.shape[2:] for o in other])
outputs = backend.map_fn(
_filter_detections,
elems=[boxes, classification, other],
dtype=dtypes,
shapes=shapes,
parallel_iterations=self.parallel_iterations,
)
return outputs
def compute_output_shape(self, input_shape):
""" Computes the output shapes given the input shapes.
Args
input_shape : List of input shapes [boxes, classification, other[0], other[1], ...].
Returns
List of tuples representing the output shapes:
[filtered_boxes.shape, filtered_scores.shape, filtered_labels.shape, filtered_other[0].shape, filtered_other[1].shape, ...]
"""
return [
(input_shape[0][0], self.max_detections, 4),
(input_shape[1][0], self.max_detections),
(input_shape[1][0], self.max_detections),
] + [
tuple([input_shape[i][0], self.max_detections] + list(input_shape[i][2:])) for i in range(2, len(input_shape))
]
def compute_mask(self, inputs, mask=None):
""" This is required in Keras when there is more than 1 output.
"""
return (len(inputs) + 1) * [None]
def get_config(self):
""" Gets the configuration of this layer.
Returns
Dictionary containing the parameters of this layer.
"""
config = super(FilterDetections, self).get_config()
config.update({
'nms' : self.nms,
'class_specific_filter' : self.class_specific_filter,
'nms_threshold' : self.nms_threshold,
'score_threshold' : self.score_threshold,
'max_detections' : self.max_detections,
'parallel_iterations' : self.parallel_iterations,
})
return config
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow
from tensorflow import keras
def focal(alpha=0.25, gamma=2.0, cutoff=0.5):
""" Create a functor for computing the focal loss.
Args
alpha: Scale the focal weight with alpha.
gamma: Take the power of the focal weight with gamma.
cutoff: Positive prediction cutoff for soft targets
Returns
A functor that computes the focal loss using the alpha and gamma.
"""
def _focal(y_true, y_pred):
""" Compute the focal loss given the target tensor and the predicted tensor.
As defined in https://arxiv.org/abs/1708.02002
Args
y_true: Tensor of target data from the generator with shape (B, N, num_classes).
y_pred: Tensor of predicted data from the network with shape (B, N, num_classes).
Returns
The focal loss of y_pred w.r.t. y_true.
"""
labels = y_true[:, :, :-1]
anchor_state = y_true[:, :, -1] # -1 for ignore, 0 for background, 1 for object
classification = y_pred
# filter out "ignore" anchors
indices = tensorflow.where(keras.backend.not_equal(anchor_state, -1))
labels = tensorflow.gather_nd(labels, indices)
classification = tensorflow.gather_nd(classification, indices)
# compute the focal loss
alpha_factor = keras.backend.ones_like(labels) * alpha
alpha_factor = tensorflow.where(keras.backend.greater(labels, cutoff), alpha_factor, 1 - alpha_factor)
focal_weight = tensorflow.where(keras.backend.greater(labels, cutoff), 1 - classification, classification)
focal_weight = alpha_factor * focal_weight ** gamma
cls_loss = focal_weight * keras.backend.binary_crossentropy(labels, classification)
# compute the normalizer: the number of positive anchors
normalizer = tensorflow.where(keras.backend.equal(anchor_state, 1))
normalizer = keras.backend.cast(keras.backend.shape(normalizer)[0], keras.backend.floatx())
normalizer = keras.backend.maximum(keras.backend.cast_to_floatx(1.0), normalizer)
return keras.backend.sum(cls_loss) / normalizer
return _focal
def smooth_l1(sigma=3.0):
""" Create a smooth L1 loss functor.
Args
sigma: This argument defines the point where the loss changes from L2 to L1.
Returns
A functor for computing the smooth L1 loss given target data and predicted data.
"""
sigma_squared = sigma ** 2
def _smooth_l1(y_true, y_pred):
""" Compute the smooth L1 loss of y_pred w.r.t. y_true.
Args
y_true: Tensor from the generator of shape (B, N, 5). The last value for each box is the state of the anchor (ignore, negative, positive).
y_pred: Tensor from the network of shape (B, N, 4).
Returns
The smooth L1 loss of y_pred w.r.t. y_true.
"""
# separate target and state
regression = y_pred
regression_target = y_true[:, :, :-1]
anchor_state = y_true[:, :, -1]
# filter out "ignore" anchors
indices = tensorflow.where(keras.backend.equal(anchor_state, 1))
regression = tensorflow.gather_nd(regression, indices)
regression_target = tensorflow.gather_nd(regression_target, indices)
# compute smooth L1 loss
# f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma
# |x| - 0.5 / sigma / sigma otherwise
regression_diff = regression - regression_target
regression_diff = keras.backend.abs(regression_diff)
regression_loss = tensorflow.where(
keras.backend.less(regression_diff, 1.0 / sigma_squared),
0.5 * sigma_squared * keras.backend.pow(regression_diff, 2),
regression_diff - 0.5 / sigma_squared
)
# compute the normalizer: the number of positive anchors
normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0])
normalizer = keras.backend.cast(normalizer, dtype=keras.backend.floatx())
return keras.backend.sum(regression_loss) / normalizer
return _smooth_l1
from __future__ import print_function
import sys
class Backbone(object):
""" This class stores additional information on backbones.
"""
def __init__(self, backbone):
# a dictionary mapping custom layer names to the correct classes
from .. import layers
from .. import losses
from .. import initializers
self.custom_objects = {
'UpsampleLike' : layers.UpsampleLike,
'PriorProbability' : initializers.PriorProbability,
'RegressBoxes' : layers.RegressBoxes,
'FilterDetections' : layers.FilterDetections,
'Anchors' : layers.Anchors,
'ClipBoxes' : layers.ClipBoxes,
'_smooth_l1' : losses.smooth_l1(),
'_focal' : losses.focal(),
}
self.backbone = backbone
self.validate()
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
raise NotImplementedError('retinanet method not implemented.')
def download_imagenet(self):
""" Downloads ImageNet weights and returns path to weights file.
"""
raise NotImplementedError('download_imagenet method not implemented.')
def validate(self):
""" Checks whether the backbone string is correct.
"""
raise NotImplementedError('validate method not implemented.')
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
Having this function in Backbone allows other backbones to define a specific preprocessing step.
"""
raise NotImplementedError('preprocess_image method not implemented.')
def backbone(backbone_name):
""" Returns a backbone object for the given backbone.
"""
if 'densenet' in backbone_name:
from .densenet import DenseNetBackbone as b
elif 'seresnext' in backbone_name or 'seresnet' in backbone_name or 'senet' in backbone_name:
from .senet import SeBackbone as b
elif 'resnet' in backbone_name:
from .resnet import ResNetBackbone as b
elif 'mobilenet' in backbone_name:
from .mobilenet import MobileNetBackbone as b
elif 'vgg' in backbone_name:
from .vgg import VGGBackbone as b
elif 'EfficientNet' in backbone_name:
from .effnet import EfficientNetBackbone as b
else:
raise NotImplementedError('Backbone class for \'{}\' not implemented.'.format(backbone))
return b(backbone_name)
def load_model(filepath, backbone_name='resnet50'):
""" Loads a retinanet model using the correct custom objects.
Args
filepath: one of the following:
- string, path to the saved model, or
- h5py.File object from which to load the model
backbone_name : Backbone with which the model was trained.
Returns
A keras.models.Model object.
Raises
ImportError: if h5py is not available.
ValueError: In case of an invalid savefile.
"""
from tensorflow import keras
return keras.models.load_model(filepath, custom_objects=backbone(backbone_name).custom_objects)
def convert_model(model, nms=True, class_specific_filter=True, anchor_params=None, **kwargs):
""" Converts a training model to an inference model.
Args
model : A retinanet training model.
nms : Boolean, whether to add NMS filtering to the converted model.
class_specific_filter : Whether to use class specific filtering or filter for the best scoring class only.
anchor_params : Anchor parameters object. If omitted, default values are used.
**kwargs : Inference and minimal retinanet model settings.
Returns
A keras.models.Model object.
Raises
ImportError: if h5py is not available.
ValueError: In case of an invalid savefile.
"""
from .retinanet import retinanet_bbox
return retinanet_bbox(model=model, nms=nms, class_specific_filter=class_specific_filter, anchor_params=anchor_params, **kwargs)
def assert_training_model(model):
""" Assert that the model is a training model.
"""
assert(all(output in model.output_names for output in ['regression', 'classification'])), \
"Input is not a training model (no 'regression' and 'classification' outputs were found, outputs are: {}).".format(model.output_names)
def check_training_model(model):
""" Check that model is a training model and exit otherwise.
"""
try:
assert_training_model(model)
except AssertionError as e:
print(e, file=sys.stderr)
sys.exit(1)
"""
Copyright 2018 vidosits (https://github.com/vidosits/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from . import retinanet
from . import Backbone
from ..utils.image import preprocess_image
allowed_backbones = {
'densenet121': ([6, 12, 24, 16], keras.applications.densenet.DenseNet121),
'densenet169': ([6, 12, 32, 32], keras.applications.densenet.DenseNet169),
'densenet201': ([6, 12, 48, 32], keras.applications.densenet.DenseNet201),
}
class DenseNetBackbone(Backbone):
""" Describes backbone information and provides utility functions.
"""
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
return densenet_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
""" Download pre-trained weights for the specified backbone name.
This name is in the format {backbone}_weights_tf_dim_ordering_tf_kernels_notop
where backbone is the densenet + number of layers (e.g. densenet121).
For more info check the explanation from the keras densenet script itself:
https://github.com/keras-team/keras/blob/master/keras/applications/densenet.py
"""
origin = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/'
file_name = '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'
# load weights
if keras.backend.image_data_format() == 'channels_first':
raise ValueError('Weights for "channels_first" format are not available.')
weights_url = origin + file_name.format(self.backbone)
return keras.utils.get_file(file_name.format(self.backbone), weights_url, cache_subdir='models')
def validate(self):
""" Checks whether the backbone string is correct.
"""
backbone = self.backbone.split('_')[0]
if backbone not in allowed_backbones:
raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones.keys()))
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
"""
return preprocess_image(inputs, mode='tf')
def densenet_retinanet(num_classes, backbone='densenet121', inputs=None, modifier=None, **kwargs):
""" Constructs a retinanet model using a densenet backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('densenet121', 'densenet169', 'densenet201')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a DenseNet backbone.
"""
# choose default input
if inputs is None:
inputs = keras.layers.Input((None, None, 3))
blocks, creator = allowed_backbones[backbone]
model = creator(input_tensor=inputs, include_top=False, pooling=None, weights=None)
# get last conv layer from the end of each dense block
layer_outputs = [model.get_layer(name='conv{}_block{}_concat'.format(idx + 2, block_num)).output for idx, block_num in enumerate(blocks)]
# create the densenet backbone
# layer_outputs contains 4 layers
model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)
# invoke modifier if given
if modifier:
model = modifier(model)
# create the full model
backbone_layers = {
'C2': model.outputs[0],
'C3': model.outputs[1],
'C4': model.outputs[2],
'C5': model.outputs[3]
}
model = retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
return model
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from . import retinanet
from . import Backbone
import efficientnet.keras as efn
class EfficientNetBackbone(Backbone):
""" Describes backbone information and provides utility functions.
"""
def __init__(self, backbone):
super(EfficientNetBackbone, self).__init__(backbone)
self.preprocess_image_func = None
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
return effnet_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
""" Downloads ImageNet weights and returns path to weights file.
"""
from efficientnet.weights import IMAGENET_WEIGHTS_PATH
from efficientnet.weights import IMAGENET_WEIGHTS_HASHES
model_name = 'efficientnet-b' + self.backbone[-1]
file_name = model_name + '_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'
file_hash = IMAGENET_WEIGHTS_HASHES[model_name][1]
weights_path = keras.utils.get_file(file_name, IMAGENET_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=file_hash)
return weights_path
def validate(self):
""" Checks whether the backbone string is correct.
"""
allowed_backbones = ['EfficientNetB0', 'EfficientNetB1', 'EfficientNetB2', 'EfficientNetB3', 'EfficientNetB4',
'EfficientNetB5', 'EfficientNetB6', 'EfficientNetB7']
backbone = self.backbone.split('_')[0]
if backbone not in allowed_backbones:
raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones))
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
"""
return efn.preprocess_input(inputs)
def effnet_retinanet(num_classes, backbone='EfficientNetB0', inputs=None, modifier=None, **kwargs):
""" Constructs a retinanet model using a resnet backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a ResNet backbone.
"""
# choose default input
if inputs is None:
if keras.backend.image_data_format() == 'channels_first':
inputs = keras.layers.Input(shape=(3, None, None))
else:
# inputs = keras.layers.Input(shape=(224, 224, 3))
inputs = keras.layers.Input(shape=(None, None, 3))
# get last conv layer from the end of each block [28x28, 14x14, 7x7]
if backbone == 'EfficientNetB0':
model = efn.EfficientNetB0(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB1':
model = efn.EfficientNetB1(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB2':
model = efn.EfficientNetB2(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB3':
model = efn.EfficientNetB3(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB4':
model = efn.EfficientNetB4(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB5':
model = efn.EfficientNetB5(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB6':
model = efn.EfficientNetB6(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB7':
model = efn.EfficientNetB7(input_tensor=inputs, include_top=False, weights=None)
else:
raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone))
layer_outputs = ['block4a_expand_activation', 'block6a_expand_activation', 'top_activation']
layer_outputs = [
model.get_layer(name=layer_outputs[0]).output, # 28x28
model.get_layer(name=layer_outputs[1]).output, # 14x14
model.get_layer(name=layer_outputs[2]).output, # 7x7
]
# create the densenet backbone
model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)
# invoke modifier if given
if modifier:
model = modifier(model)
# C2 not provided
backbone_layers = {
'C3': model.outputs[0],
'C4': model.outputs[1],
'C5': model.outputs[2]
}
# create the full model
return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
def EfficientNetB0_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB0', inputs=inputs, **kwargs)
def EfficientNetB1_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB1', inputs=inputs, **kwargs)
def EfficientNetB2_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB2', inputs=inputs, **kwargs)
def EfficientNetB3_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB3', inputs=inputs, **kwargs)
def EfficientNetB4_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB4', inputs=inputs, **kwargs)
def EfficientNetB5_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB5', inputs=inputs, **kwargs)
def EfficientNetB6_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB6', inputs=inputs, **kwargs)
def EfficientNetB7_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB7', inputs=inputs, **kwargs)
"""
Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from ..utils.image import preprocess_image
from . import retinanet
from . import Backbone
class MobileNetBackbone(Backbone):
""" Describes backbone information and provides utility functions.
"""
allowed_backbones = ['mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224']
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
return mobilenet_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
""" Download pre-trained weights for the specified backbone name.
This name is in the format mobilenet{rows}_{alpha} where rows is the
imagenet shape dimension and 'alpha' controls the width of the network.
For more info check the explanation from the keras mobilenet script itself.
"""
alpha = float(self.backbone.split('_')[1])
rows = int(self.backbone.split('_')[0].replace('mobilenet', ''))
# load weights
if keras.backend.image_data_format() == 'channels_first':
raise ValueError('Weights for "channels_last" format '
'are not available.')
if alpha == 1.0:
alpha_text = '1_0'
elif alpha == 0.75:
alpha_text = '7_5'
elif alpha == 0.50:
alpha_text = '5_0'
else:
alpha_text = '2_5'
model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows)
weights_url = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.6/' + model_name
weights_path = keras.utils.get_file(model_name, weights_url, cache_subdir='models')
return weights_path
def validate(self):
""" Checks whether the backbone string is correct.
"""
backbone = self.backbone.split('_')[0]
if backbone not in MobileNetBackbone.allowed_backbones:
raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, MobileNetBackbone.allowed_backbones))
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
"""
return preprocess_image(inputs, mode='tf')
def mobilenet_retinanet(num_classes, backbone='mobilenet224_1.0', inputs=None, modifier=None, **kwargs):
""" Constructs a retinanet model using a mobilenet backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a MobileNet backbone.
"""
alpha = float(backbone.split('_')[1])
# choose default input
if inputs is None:
inputs = keras.layers.Input((None, None, 3))
backbone = keras.applications.mobilenet.MobileNet(input_tensor=inputs, alpha=alpha, include_top=False, pooling=None, weights=None)
# create the full model
layer_names = ['conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu']
layer_outputs = [backbone.get_layer(name).output for name in layer_names]
backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone.name)
# invoke modifier if given
if modifier:
backbone = modifier(backbone)
# C2 not provided
backbone_layers = {
'C3': backbone.outputs[0],
'C4': backbone.outputs[1],
'C5': backbone.outputs[2]
}
return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
import keras_resnet
import keras_resnet.models
from . import retinanet
from . import Backbone
from ..utils.image import preprocess_image
class ResNetBackbone(Backbone):
"""Describes backbone information and provides utility functions."""
def __init__(self, backbone):
super(ResNetBackbone, self).__init__(backbone)
self.custom_objects.update(keras_resnet.custom_objects)
def retinanet(self, *args, **kwargs):
"""Returns a retinanet model using the correct backbone."""
return resnet_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
"""Downloads ImageNet weights and returns path to weights file."""
resnet_filename = "ResNet-{}-model.keras.h5"
resnet_resource = (
"https://github.com/fizyr/keras-models/releases/download/v0.0.1/{}".format(
resnet_filename
)
)
depth = int(self.backbone.replace("resnet", ""))
filename = resnet_filename.format(depth)
resource = resnet_resource.format(depth)
if depth == 50:
checksum = "3e9f4e4f77bbe2c9bec13b53ee1c2319"
elif depth == 101:
checksum = "05dc86924389e5b401a9ea0348a3213c"
elif depth == 152:
checksum = "6ee11ef2b135592f8031058820bb9e71"
return keras.utils.get_file(
filename, resource, cache_subdir="models", md5_hash=checksum
)
def validate(self):
"""Checks whether the backbone string is correct."""
allowed_backbones = ["resnet50", "resnet101", "resnet152"]
backbone = self.backbone.split("_")[0]
if backbone not in allowed_backbones:
raise ValueError(
"Backbone ('{}') not in allowed backbones ({}).".format(
backbone, allowed_backbones
)
)
def preprocess_image(self, inputs):
"""Takes as input an image and prepares it for being passed through the network."""
return preprocess_image(inputs, mode="caffe")
def resnet_retinanet(
num_classes, backbone="resnet50", inputs=None, modifier=None, **kwargs
):
"""Constructs a retinanet model using a resnet backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a ResNet backbone.
"""
# choose default input
if inputs is None:
if keras.backend.image_data_format() == "channels_first":
inputs = keras.layers.Input(shape=(3, None, None))
else:
inputs = keras.layers.Input(shape=(None, None, 3))
# create the resnet backbone
if backbone == "resnet50":
resnet = keras_resnet.models.ResNet50(inputs, include_top=False, freeze_bn=True)
elif backbone == "resnet101":
resnet = keras_resnet.models.ResNet101(
inputs, include_top=False, freeze_bn=True
)
elif backbone == "resnet152":
resnet = keras_resnet.models.ResNet152(
inputs, include_top=False, freeze_bn=True
)
else:
raise ValueError("Backbone ('{}') is invalid.".format(backbone))
# invoke modifier if given
if modifier:
resnet = modifier(resnet)
# create the full model
# resnet.outputs contains 4 layers
backbone_layers = {
"C2": resnet.outputs[0],
"C3": resnet.outputs[1],
"C4": resnet.outputs[2],
"C5": resnet.outputs[3],
}
return retinanet.retinanet(
inputs=inputs,
num_classes=num_classes,
backbone_layers=backbone_layers,
**kwargs
)
def resnet50_retinanet(num_classes, inputs=None, **kwargs):
return resnet_retinanet(
num_classes=num_classes, backbone="resnet50", inputs=inputs, **kwargs
)
def resnet101_retinanet(num_classes, inputs=None, **kwargs):
return resnet_retinanet(
num_classes=num_classes, backbone="resnet101", inputs=inputs, **kwargs
)
def resnet152_retinanet(num_classes, inputs=None, **kwargs):
return resnet_retinanet(
num_classes=num_classes, backbone="resnet152", inputs=inputs, **kwargs
)
This diff is collapsed. Click to expand it.
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from . import retinanet
from . import Backbone
from classification_models.keras import Classifiers
class SeBackbone(Backbone):
""" Describes backbone information and provides utility functions.
"""
def __init__(self, backbone):
super(SeBackbone, self).__init__(backbone)
_, self.preprocess_image_func = Classifiers.get(self.backbone)
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
return senet_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
""" Downloads ImageNet weights and returns path to weights file.
"""
from classification_models.weights import WEIGHTS_COLLECTION
weights_path = None
for el in WEIGHTS_COLLECTION:
if el['model'] == self.backbone and not el['include_top']:
weights_path = keras.utils.get_file(el['name'], el['url'], cache_subdir='models', file_hash=el['md5'])
if weights_path is None:
raise ValueError('Unable to find imagenet weights for backbone {}!'.format(self.backbone))
return weights_path
def validate(self):
""" Checks whether the backbone string is correct.
"""
allowed_backbones = ['seresnet18', 'seresnet34', 'seresnet50', 'seresnet101', 'seresnet152',
'seresnext50', 'seresnext101', 'senet154']
backbone = self.backbone.split('_')[0]
if backbone not in allowed_backbones:
raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones))
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
"""
return self.preprocess_image_func(inputs)
def senet_retinanet(num_classes, backbone='seresnext50', inputs=None, modifier=None, **kwargs):
""" Constructs a retinanet model using a resnet backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a ResNet backbone.
"""
# choose default input
if inputs is None:
if keras.backend.image_data_format() == 'channels_first':
inputs = keras.layers.Input(shape=(3, None, None))
else:
# inputs = keras.layers.Input(shape=(224, 224, 3))
inputs = keras.layers.Input(shape=(None, None, 3))
classifier, _ = Classifiers.get(backbone)
model = classifier(input_tensor=inputs, include_top=False, weights=None)
# get last conv layer from the end of each block [28x28, 14x14, 7x7]
if backbone == 'seresnet18' or backbone == 'seresnet34':
layer_outputs = ['stage3_unit1_relu1', 'stage4_unit1_relu1', 'relu1']
elif backbone == 'seresnet50':
layer_outputs = ['activation_36', 'activation_66', 'activation_81']
elif backbone == 'seresnet101':
layer_outputs = ['activation_36', 'activation_151', 'activation_166']
elif backbone == 'seresnet152':
layer_outputs = ['activation_56', 'activation_236', 'activation_251']
elif backbone == 'seresnext50':
layer_outputs = ['activation_37', 'activation_67', 'activation_81']
elif backbone == 'seresnext101':
layer_outputs = ['activation_37', 'activation_152', 'activation_166']
elif backbone == 'senet154':
layer_outputs = ['activation_59', 'activation_239', 'activation_253']
else:
raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone))
layer_outputs = [
model.get_layer(name=layer_outputs[0]).output, # 28x28
model.get_layer(name=layer_outputs[1]).output, # 14x14
model.get_layer(name=layer_outputs[2]).output, # 7x7
]
# create the densenet backbone
model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)
# invoke modifier if given
if modifier:
model = modifier(model)
# C2 not provided
backbone_layers = {
'C3': model.outputs[0],
'C4': model.outputs[1],
'C5': model.outputs[2]
}
# create the full model
return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
def seresnet18_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnet18', inputs=inputs, **kwargs)
def seresnet34_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnet34', inputs=inputs, **kwargs)
def seresnet50_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnet50', inputs=inputs, **kwargs)
def seresnet101_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnet101', inputs=inputs, **kwargs)
def seresnet152_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnet152', inputs=inputs, **kwargs)
def seresnext50_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnext50', inputs=inputs, **kwargs)
def seresnext101_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnext101', inputs=inputs, **kwargs)
def senet154_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='senet154', inputs=inputs, **kwargs)
from tensorflow import keras
from .. import initializers
from .. import layers
from ..utils.anchors import AnchorParameters
from . import assert_training_model
from . import retinanet
def custom_classification_model(
num_classes,
num_anchors,
pyramid_feature_size=256,
prior_probability=0.01,
classification_feature_size=256,
name='classification_submodel'
):
# set input
if keras.backend.image_data_format() == "channels_first":
inputs = keras.layers.Input(shape=(pyramid_feature_size, None, None))
else:
inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size))
outputs = inputs
# 3 layer
for i in range(3):
# 각 층의 output
outputs = keras.layers.Conv2D(
filters=classification_feature_size,
activation="relu",
name="pyramid_classification_{}".format(i),
kernel_initializer=keras.initializers.RandomNormal(
mean=0.0, stddev=0.01, seed=None
), # 정규분포에 따라 텐서를 생성하는 초기값 설정
bias_initializer="zeros",
**options
)(outputs)
# 마지막 layer는 다른 필터로 다른 conv layer를 생성
outputs = keras.layers.Conv2D(
filters=num_classes * num_anchors,
kernel_initializer=keras.initializers.RandomNormal(
mean=0.0, stddev=0.01, seed=None
),
bias_initializer=initializers.PriorProbability(probability=prior_probability),
name="pyramid_classification",
**options
)(outputs)
# reshape output and apply sigmoid
if keras.backend.image_data_format() == "channels_first":
outputs = keras.layers.Permute(
(2, 3, 1), name="pyramid_classification_permute"
)(outputs)
# reshape : 2차원 > 1차원
outputs = keras.layers.Reshape(
(-1, num_classes), name="pyramid_classification_reshape"
)(outputs)
# output layer activation : sigmoid
outputs = keras.layers.Activation("sigmoid", name="pyramid_classification_sigmoid")(
outputs
)
return keras.models.Model(inputs=inputs, outputs=outputs, name=name)
def custom_regression_model(num_values, num_anchors, pyramid_feature_size=256, regression_feature_size=256, name='regression_submodel'):
if num_anchors is None:
num_anchors = AnchorParameters.default.num_anchors()
model = retinanet.default_regression_model(num_values, num_anchors, pyramid_feature_size, regression_feature_size, name)
return model
def custom_submodels(num_classes, num_anchors):
if num_anchors is None:
num_anchors = AnchorParameters.default.num_anchors()
return [
("regression", custom_regression_model(4, num_anchors)),
("classification", custom_classification_model(num_classes, num_anchors)),
]
"""
Copyright 2017-2018 cgratie (https://github.com/cgratie/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from . import retinanet
from . import Backbone
from ..utils.image import preprocess_image
class VGGBackbone(Backbone):
""" Describes backbone information and provides utility functions.
"""
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
return vgg_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
""" Downloads ImageNet weights and returns path to weights file.
Weights can be downloaded at https://github.com/fizyr/keras-models/releases .
"""
if self.backbone == 'vgg16':
resource = keras.applications.vgg16.vgg16.WEIGHTS_PATH_NO_TOP
checksum = '6d6bbae143d832006294945121d1f1fc'
elif self.backbone == 'vgg19':
resource = keras.applications.vgg19.vgg19.WEIGHTS_PATH_NO_TOP
checksum = '253f8cb515780f3b799900260a226db6'
else:
raise ValueError("Backbone '{}' not recognized.".format(self.backbone))
return keras.utils.get_file(
'{}_weights_tf_dim_ordering_tf_kernels_notop.h5'.format(self.backbone),
resource,
cache_subdir='models',
file_hash=checksum
)
def validate(self):
""" Checks whether the backbone string is correct.
"""
allowed_backbones = ['vgg16', 'vgg19']
if self.backbone not in allowed_backbones:
raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(self.backbone, allowed_backbones))
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
"""
return preprocess_image(inputs, mode='caffe')
def vgg_retinanet(num_classes, backbone='vgg16', inputs=None, modifier=None, **kwargs):
""" Constructs a retinanet model using a vgg backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('vgg16', 'vgg19')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a VGG backbone.
"""
# choose default input
if inputs is None:
inputs = keras.layers.Input(shape=(None, None, 3))
# create the vgg backbone
if backbone == 'vgg16':
vgg = keras.applications.VGG16(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'vgg19':
vgg = keras.applications.VGG19(input_tensor=inputs, include_top=False, weights=None)
else:
raise ValueError("Backbone '{}' not recognized.".format(backbone))
if modifier:
vgg = modifier(vgg)
# create the full model
layer_names = ["block3_pool", "block4_pool", "block5_pool"]
layer_outputs = [vgg.get_layer(name).output for name in layer_names]
# C2 not provided
backbone_layers = {
'C3': layer_outputs[0],
'C4': layer_outputs[1],
'C5': layer_outputs[2]
}
return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from ..preprocessing.generator import Generator
from ..utils.image import read_image_bgr
import os
import numpy as np
from pycocotools.coco import COCO
class CocoGenerator(Generator):
""" Generate data from the COCO dataset.
See https://github.com/cocodataset/cocoapi/tree/master/PythonAPI for more information.
"""
def __init__(self, data_dir, set_name, **kwargs):
""" Initialize a COCO data generator.
Args
data_dir: Path to where the COCO dataset is stored.
set_name: Name of the set to parse.
"""
self.data_dir = data_dir
self.set_name = set_name
self.coco = COCO(os.path.join(data_dir, 'annotations', 'instances_' + set_name + '.json'))
self.image_ids = self.coco.getImgIds()
self.load_classes()
super(CocoGenerator, self).__init__(**kwargs)
def load_classes(self):
""" Loads the class to label mapping (and inverse) for COCO.
"""
# load class names (name -> label)
categories = self.coco.loadCats(self.coco.getCatIds())
categories.sort(key=lambda x: x['id'])
self.classes = {}
self.coco_labels = {}
self.coco_labels_inverse = {}
for c in categories:
self.coco_labels[len(self.classes)] = c['id']
self.coco_labels_inverse[c['id']] = len(self.classes)
self.classes[c['name']] = len(self.classes)
# also load the reverse (label -> name)
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
def size(self):
""" Size of the COCO dataset.
"""
return len(self.image_ids)
def num_classes(self):
""" Number of classes in the dataset. For COCO this is 80.
"""
return len(self.classes)
def has_label(self, label):
""" Return True if label is a known label.
"""
return label in self.labels
def has_name(self, name):
""" Returns True if name is a known class.
"""
return name in self.classes
def name_to_label(self, name):
""" Map name to label.
"""
return self.classes[name]
def label_to_name(self, label):
""" Map label to name.
"""
return self.labels[label]
def coco_label_to_label(self, coco_label):
""" Map COCO label to the label as used in the network.
COCO has some gaps in the order of labels. The highest label is 90, but there are 80 classes.
"""
return self.coco_labels_inverse[coco_label]
def coco_label_to_name(self, coco_label):
""" Map COCO label to name.
"""
return self.label_to_name(self.coco_label_to_label(coco_label))
def label_to_coco_label(self, label):
""" Map label as used by the network to labels as used by COCO.
"""
return self.coco_labels[label]
def image_path(self, image_index):
""" Returns the image path for image_index.
"""
image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
path = os.path.join(self.data_dir, 'images', self.set_name, image_info['file_name'])
return path
def image_aspect_ratio(self, image_index):
""" Compute the aspect ratio for an image with image_index.
"""
image = self.coco.loadImgs(self.image_ids[image_index])[0]
return float(image['width']) / float(image['height'])
def load_image(self, image_index):
""" Load an image at the image_index.
"""
path = self.image_path(image_index)
return read_image_bgr(path)
def load_annotations(self, image_index):
""" Load annotations for an image_index.
"""
# get ground truth annotations
annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
annotations = {'labels': np.empty((0,)), 'bboxes': np.empty((0, 4))}
# some images appear to miss annotations (like image with id 257034)
if len(annotations_ids) == 0:
return annotations
# parse annotations
coco_annotations = self.coco.loadAnns(annotations_ids)
for idx, a in enumerate(coco_annotations):
# some annotations have basically no width / height, skip them
if a['bbox'][2] < 1 or a['bbox'][3] < 1:
continue
annotations['labels'] = np.concatenate([annotations['labels'], [self.coco_label_to_label(a['category_id'])]], axis=0)
annotations['bboxes'] = np.concatenate([annotations['bboxes'], [[
a['bbox'][0],
a['bbox'][1],
a['bbox'][0] + a['bbox'][2],
a['bbox'][1] + a['bbox'][3],
]]], axis=0)
return annotations
"""
Copyright 2017-2018 yhenon (https://github.com/yhenon/)
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from .generator import Generator
from ..utils.image import read_image_bgr
import numpy as np
from PIL import Image
from six import raise_from
import csv
import sys
import os.path
from collections import OrderedDict
def _parse(value, function, fmt):
"""
Parse a string into a value, and format a nice ValueError if it fails.
Returns `function(value)`.
Any `ValueError` raised is catched and a new `ValueError` is raised
with message `fmt.format(e)`, where `e` is the caught `ValueError`.
"""
try:
return function(value)
except ValueError as e:
raise_from(ValueError(fmt.format(e)), None)
def _read_classes(csv_reader):
""" Parse the classes file given by csv_reader.
"""
result = OrderedDict()
for line, row in enumerate(csv_reader):
line += 1
try:
class_name, class_id = row
except ValueError:
raise_from(ValueError('line {}: format should be \'class_name,class_id\''.format(line)), None)
class_id = _parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
if class_name in result:
raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
result[class_name] = class_id
return result
def _read_annotations(csv_reader, classes):
""" Read annotations from the csv_reader.
"""
result = OrderedDict()
for line, row in enumerate(csv_reader):
line += 1
try:
img_file, x1, y1, x2, y2, class_name = row[:6]
except ValueError:
raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)
if img_file not in result:
result[img_file] = []
# If a row contains only an image path, it's an image without annotations.
if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''):
continue
x1 = _parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
y1 = _parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
x2 = _parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
y2 = _parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
# Check that the bounding box is valid.
if x2 <= x1:
raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
if y2 <= y1:
raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
# check if the current class name is correctly present
if class_name not in classes:
raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
return result
def _open_for_csv(path):
""" Open a file with flags suitable for csv.reader.
This is different for python2 it means with mode 'rb',
for python3 this means 'r' with "universal newlines".
"""
if sys.version_info[0] < 3:
return open(path, 'rb')
else:
return open(path, 'r', newline='')
class CSVGenerator(Generator):
""" Generate data for a custom CSV dataset.
See https://github.com/fizyr/keras-retinanet#csv-datasets for more information.
"""
def __init__(
self,
csv_data_file,
csv_class_file,
base_dir=None,
**kwargs
):
""" Initialize a CSV data generator.
Args
csv_data_file: Path to the CSV annotations file.
csv_class_file: Path to the CSV classes file.
base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
"""
self.image_names = []
self.image_data = {}
self.base_dir = base_dir
# Take base_dir from annotations file if not explicitly specified.
if self.base_dir is None:
self.base_dir = os.path.dirname(csv_data_file)
# parse the provided class file
try:
with _open_for_csv(csv_class_file) as file:
self.classes = _read_classes(csv.reader(file, delimiter=','))
except ValueError as e:
raise_from(ValueError('invalid CSV class file: {}: {}'.format(csv_class_file, e)), None)
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
# csv with img_path, x1, y1, x2, y2, class_name
try:
with _open_for_csv(csv_data_file) as file:
self.image_data = _read_annotations(csv.reader(file, delimiter=','), self.classes)
except ValueError as e:
raise_from(ValueError('invalid CSV annotations file: {}: {}'.format(csv_data_file, e)), None)
self.image_names = list(self.image_data.keys())
super(CSVGenerator, self).__init__(**kwargs)
def size(self):
""" Size of the dataset.
"""
return len(self.image_names)
def num_classes(self):
""" Number of classes in the dataset.
"""
return max(self.classes.values()) + 1
def has_label(self, label):
""" Return True if label is a known label.
"""
return label in self.labels
def has_name(self, name):
""" Returns True if name is a known class.
"""
return name in self.classes
def name_to_label(self, name):
""" Map name to label.
"""
return self.classes[name]
def label_to_name(self, label):
""" Map label to name.
"""
return self.labels[label]
def image_path(self, image_index):
""" Returns the image path for image_index.
"""
return os.path.join(self.base_dir, self.image_names[image_index])
def image_aspect_ratio(self, image_index):
""" Compute the aspect ratio for an image with image_index.
"""
# PIL is fast for metadata
image = Image.open(self.image_path(image_index))
return float(image.width) / float(image.height)
def load_image(self, image_index):
""" Load an image at the image_index.
"""
return read_image_bgr(self.image_path(image_index))
def load_annotations(self, image_index):
""" Load annotations for an image_index.
"""
path = self.image_names[image_index]
annotations = {'labels': np.empty((0,)), 'bboxes': np.empty((0, 4))}
for idx, annot in enumerate(self.image_data[path]):
annotations['labels'] = np.concatenate((annotations['labels'], [self.name_to_label(annot['class'])]))
annotations['bboxes'] = np.concatenate((annotations['bboxes'], [[
float(annot['x1']),
float(annot['y1']),
float(annot['x2']),
float(annot['y2']),
]]))
return annotations
This diff is collapsed. Click to expand it.
"""
Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import csv
import os.path
import numpy as np
from PIL import Image
from .generator import Generator
from ..utils.image import read_image_bgr
kitti_classes = {
'Car': 0,
'Van': 1,
'Truck': 2,
'Pedestrian': 3,
'Person_sitting': 4,
'Cyclist': 5,
'Tram': 6,
'Misc': 7,
'DontCare': 7
}
class KittiGenerator(Generator):
""" Generate data for a KITTI dataset.
See http://www.cvlibs.net/datasets/kitti/ for more information.
"""
def __init__(
self,
base_dir,
subset='train',
**kwargs
):
""" Initialize a KITTI data generator.
Args
base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
subset: The subset to generate data for (defaults to 'train').
"""
self.base_dir = base_dir
label_dir = os.path.join(self.base_dir, subset, 'labels')
image_dir = os.path.join(self.base_dir, subset, 'images')
"""
1 type Describes the type of object: 'Car', 'Van', 'Truck',
'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
'Misc' or 'DontCare'
1 truncated Float from 0 (non-truncated) to 1 (truncated), where
truncated refers to the object leaving image boundaries
1 occluded Integer (0,1,2,3) indicating occlusion state:
0 = fully visible, 1 = partly occluded
2 = largely occluded, 3 = unknown
1 alpha Observation angle of object, ranging [-pi..pi]
4 bbox 2D bounding box of object in the image (0-based index):
contains left, top, right, bottom pixel coordinates
3 dimensions 3D object dimensions: height, width, length (in meters)
3 location 3D object location x,y,z in camera coordinates (in meters)
1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi]
"""
self.labels = {}
self.classes = kitti_classes
for name, label in self.classes.items():
self.labels[label] = name
self.image_data = dict()
self.images = []
for i, fn in enumerate(os.listdir(label_dir)):
label_fp = os.path.join(label_dir, fn)
image_fp = os.path.join(image_dir, fn.replace('.txt', '.png'))
self.images.append(image_fp)
fieldnames = ['type', 'truncated', 'occluded', 'alpha', 'left', 'top', 'right', 'bottom', 'dh', 'dw', 'dl',
'lx', 'ly', 'lz', 'ry']
with open(label_fp, 'r') as csv_file:
reader = csv.DictReader(csv_file, delimiter=' ', fieldnames=fieldnames)
boxes = []
for line, row in enumerate(reader):
label = row['type']
cls_id = kitti_classes[label]
annotation = {'cls_id': cls_id, 'x1': row['left'], 'x2': row['right'], 'y2': row['bottom'], 'y1': row['top']}
boxes.append(annotation)
self.image_data[i] = boxes
super(KittiGenerator, self).__init__(**kwargs)
def size(self):
""" Size of the dataset.
"""
return len(self.images)
def num_classes(self):
""" Number of classes in the dataset.
"""
return max(self.classes.values()) + 1
def has_label(self, label):
""" Return True if label is a known label.
"""
return label in self.labels
def has_name(self, name):
""" Returns True if name is a known class.
"""
return name in self.classes
def name_to_label(self, name):
""" Map name to label.
"""
raise NotImplementedError()
def label_to_name(self, label):
""" Map label to name.
"""
return self.labels[label]
def image_aspect_ratio(self, image_index):
""" Compute the aspect ratio for an image with image_index.
"""
# PIL is fast for metadata
image = Image.open(self.images[image_index])
return float(image.width) / float(image.height)
def image_path(self, image_index):
""" Get the path to an image.
"""
return self.images[image_index]
def load_image(self, image_index):
""" Load an image at the image_index.
"""
return read_image_bgr(self.image_path(image_index))
def load_annotations(self, image_index):
""" Load annotations for an image_index.
"""
image_data = self.image_data[image_index]
annotations = {'labels': np.empty((len(image_data),)), 'bboxes': np.empty((len(image_data), 4))}
for idx, ann in enumerate(image_data):
annotations['bboxes'][idx, 0] = float(ann['x1'])
annotations['bboxes'][idx, 1] = float(ann['y1'])
annotations['bboxes'][idx, 2] = float(ann['x2'])
annotations['bboxes'][idx, 3] = float(ann['y2'])
annotations['labels'][idx] = int(ann['cls_id'])
return annotations
This diff is collapsed. Click to expand it.
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from ..preprocessing.generator import Generator
from ..utils.image import read_image_bgr
import os
import numpy as np
from six import raise_from
from PIL import Image
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
voc_classes = {
'aeroplane' : 0,
'bicycle' : 1,
'bird' : 2,
'boat' : 3,
'bottle' : 4,
'bus' : 5,
'car' : 6,
'cat' : 7,
'chair' : 8,
'cow' : 9,
'diningtable' : 10,
'dog' : 11,
'horse' : 12,
'motorbike' : 13,
'person' : 14,
'pottedplant' : 15,
'sheep' : 16,
'sofa' : 17,
'train' : 18,
'tvmonitor' : 19
}
def _findNode(parent, name, debug_name=None, parse=None):
if debug_name is None:
debug_name = name
result = parent.find(name)
if result is None:
raise ValueError('missing element \'{}\''.format(debug_name))
if parse is not None:
try:
return parse(result.text)
except ValueError as e:
raise_from(ValueError('illegal value for \'{}\': {}'.format(debug_name, e)), None)
return result
class PascalVocGenerator(Generator):
""" Generate data for a Pascal VOC dataset.
See http://host.robots.ox.ac.uk/pascal/VOC/ for more information.
"""
def __init__(
self,
data_dir,
set_name,
classes=voc_classes,
image_extension='.jpg',
skip_truncated=False,
skip_difficult=False,
**kwargs
):
""" Initialize a Pascal VOC data generator.
Args
base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
csv_class_file: Path to the CSV classes file.
"""
self.data_dir = data_dir
self.set_name = set_name
self.classes = classes
self.image_names = [line.strip().split(None, 1)[0] for line in open(os.path.join(data_dir, 'ImageSets', 'Main', set_name + '.txt')).readlines()]
self.image_extension = image_extension
self.skip_truncated = skip_truncated
self.skip_difficult = skip_difficult
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
super(PascalVocGenerator, self).__init__(**kwargs)
def size(self):
""" Size of the dataset.
"""
return len(self.image_names)
def num_classes(self):
""" Number of classes in the dataset.
"""
return len(self.classes)
def has_label(self, label):
""" Return True if label is a known label.
"""
return label in self.labels
def has_name(self, name):
""" Returns True if name is a known class.
"""
return name in self.classes
def name_to_label(self, name):
""" Map name to label.
"""
return self.classes[name]
def label_to_name(self, label):
""" Map label to name.
"""
return self.labels[label]
def image_aspect_ratio(self, image_index):
""" Compute the aspect ratio for an image with image_index.
"""
path = os.path.join(self.data_dir, 'JPEGImages', self.image_names[image_index] + self.image_extension)
image = Image.open(path)
return float(image.width) / float(image.height)
def image_path(self, image_index):
""" Get the path to an image.
"""
return os.path.join(self.data_dir, 'JPEGImages', self.image_names[image_index] + self.image_extension)
def load_image(self, image_index):
""" Load an image at the image_index.
"""
return read_image_bgr(self.image_path(image_index))
def __parse_annotation(self, element):
""" Parse an annotation given an XML element.
"""
truncated = _findNode(element, 'truncated', parse=int)
difficult = _findNode(element, 'difficult', parse=int)
class_name = _findNode(element, 'name').text
if class_name not in self.classes:
raise ValueError('class name \'{}\' not found in classes: {}'.format(class_name, list(self.classes.keys())))
box = np.zeros((4,))
label = self.name_to_label(class_name)
bndbox = _findNode(element, 'bndbox')
box[0] = _findNode(bndbox, 'xmin', 'bndbox.xmin', parse=float) - 1
box[1] = _findNode(bndbox, 'ymin', 'bndbox.ymin', parse=float) - 1
box[2] = _findNode(bndbox, 'xmax', 'bndbox.xmax', parse=float) - 1
box[3] = _findNode(bndbox, 'ymax', 'bndbox.ymax', parse=float) - 1
return truncated, difficult, box, label
def __parse_annotations(self, xml_root):
""" Parse all annotations under the xml_root.
"""
annotations = {'labels': np.empty((len(xml_root.findall('object')),)), 'bboxes': np.empty((len(xml_root.findall('object')), 4))}
for i, element in enumerate(xml_root.iter('object')):
try:
truncated, difficult, box, label = self.__parse_annotation(element)
except ValueError as e:
raise_from(ValueError('could not parse object #{}: {}'.format(i, e)), None)
if truncated and self.skip_truncated:
continue
if difficult and self.skip_difficult:
continue
annotations['bboxes'][i, :] = box
annotations['labels'][i] = label
return annotations
def load_annotations(self, image_index):
""" Load annotations for an image_index.
"""
filename = self.image_names[image_index] + '.xml'
try:
tree = ET.parse(os.path.join(self.data_dir, 'Annotations', filename))
return self.__parse_annotations(tree.getroot())
except ET.ParseError as e:
raise_from(ValueError('invalid annotations file: {}: {}'.format(filename, e)), None)
except ValueError as e:
raise_from(ValueError('invalid annotations file: {}: {}'.format(filename, e)), None)
This diff is collapsed. Click to expand it.
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from pycocotools.cocoeval import COCOeval
from tensorflow import keras
import numpy as np
import json
import progressbar
assert(callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead."
def evaluate_coco(generator, model, threshold=0.05):
""" Use the pycocotools to evaluate a COCO model on a dataset.
Args
generator : The generator for generating the evaluation data.
model : The model to evaluate.
threshold : The score threshold to use.
"""
# start collecting results
results = []
image_ids = []
for index in progressbar.progressbar(range(generator.size()), prefix='COCO evaluation: '):
image = generator.load_image(index)
image = generator.preprocess_image(image)
image, scale = generator.resize_image(image)
if keras.backend.image_data_format() == 'channels_first':
image = image.transpose((2, 0, 1))
# run network
boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))
# correct boxes for image scale
boxes /= scale
# change to (x, y, w, h) (MS COCO standard)
boxes[:, :, 2] -= boxes[:, :, 0]
boxes[:, :, 3] -= boxes[:, :, 1]
# compute predicted labels and scores
for box, score, label in zip(boxes[0], scores[0], labels[0]):
# scores are sorted, so we can break
if score < threshold:
break
# append detection for each positively labeled class
image_result = {
'image_id' : generator.image_ids[index],
'category_id' : generator.label_to_coco_label(label),
'score' : float(score),
'bbox' : box.tolist(),
}
# append detection to results
results.append(image_result)
# append image to list of processed images
image_ids.append(generator.image_ids[index])
if not len(results):
return
# write output
json.dump(results, open('{}_bbox_results.json'.format(generator.set_name), 'w'), indent=4)
json.dump(image_ids, open('{}_processed_image_ids.json'.format(generator.set_name), 'w'), indent=4)
# load results in COCO evaluation tool
coco_true = generator.coco
coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(generator.set_name))
# run COCO evaluation
coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
coco_eval.params.imgIds = image_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
return coco_eval.stats
import warnings
def label_color(label):
""" Return a color from a set of predefined colors. Contains 80 colors in total.
Args
label: The label to get the color for.
Returns
A list of three values representing a RGB color.
If no color is defined for a certain label, the color green is returned and a warning is printed.
"""
if label < len(colors):
return colors[label]
else:
warnings.warn('Label {} has no color, returning default.'.format(label))
return (0, 255, 0)
"""
Generated using:
```
colors = [list((matplotlib.colors.hsv_to_rgb([x, 1.0, 1.0]) * 255).astype(int)) for x in np.arange(0, 1, 1.0 / 80)]
shuffle(colors)
pprint(colors)
```
"""
colors = [
[31 , 0 , 255] ,
[0 , 159 , 255] ,
[255 , 95 , 0] ,
[255 , 19 , 0] ,
[255 , 0 , 0] ,
[255 , 38 , 0] ,
[0 , 255 , 25] ,
[255 , 0 , 133] ,
[255 , 172 , 0] ,
[108 , 0 , 255] ,
[0 , 82 , 255] ,
[0 , 255 , 6] ,
[255 , 0 , 152] ,
[223 , 0 , 255] ,
[12 , 0 , 255] ,
[0 , 255 , 178] ,
[108 , 255 , 0] ,
[184 , 0 , 255] ,
[255 , 0 , 76] ,
[146 , 255 , 0] ,
[51 , 0 , 255] ,
[0 , 197 , 255] ,
[255 , 248 , 0] ,
[255 , 0 , 19] ,
[255 , 0 , 38] ,
[89 , 255 , 0] ,
[127 , 255 , 0] ,
[255 , 153 , 0] ,
[0 , 255 , 255] ,
[0 , 255 , 216] ,
[0 , 255 , 121] ,
[255 , 0 , 248] ,
[70 , 0 , 255] ,
[0 , 255 , 159] ,
[0 , 216 , 255] ,
[0 , 6 , 255] ,
[0 , 63 , 255] ,
[31 , 255 , 0] ,
[255 , 57 , 0] ,
[255 , 0 , 210] ,
[0 , 255 , 102] ,
[242 , 255 , 0] ,
[255 , 191 , 0] ,
[0 , 255 , 63] ,
[255 , 0 , 95] ,
[146 , 0 , 255] ,
[184 , 255 , 0] ,
[255 , 114 , 0] ,
[0 , 255 , 235] ,
[255 , 229 , 0] ,
[0 , 178 , 255] ,
[255 , 0 , 114] ,
[255 , 0 , 57] ,
[0 , 140 , 255] ,
[0 , 121 , 255] ,
[12 , 255 , 0] ,
[255 , 210 , 0] ,
[0 , 255 , 44] ,
[165 , 255 , 0] ,
[0 , 25 , 255] ,
[0 , 255 , 140] ,
[0 , 101 , 255] ,
[0 , 255 , 82] ,
[223 , 255 , 0] ,
[242 , 0 , 255] ,
[89 , 0 , 255] ,
[165 , 0 , 255] ,
[70 , 255 , 0] ,
[255 , 0 , 172] ,
[255 , 76 , 0] ,
[203 , 255 , 0] ,
[204 , 0 , 255] ,
[255 , 0 , 229] ,
[255 , 133 , 0] ,
[127 , 0 , 255] ,
[0 , 235 , 255] ,
[0 , 255 , 197] ,
[255 , 0 , 191] ,
[0 , 44 , 255] ,
[50 , 255 , 0]
]
This diff could not be displayed because it is too large.
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Sergey Karayev
# --------------------------------------------------------
cimport cython
import numpy as np
cimport numpy as np
def compute_overlap(
np.ndarray[double, ndim=2] boxes,
np.ndarray[double, ndim=2] query_boxes
):
"""
Args
a: (N, 4) ndarray of float
b: (K, 4) ndarray of float
Returns
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
"""
cdef unsigned int N = boxes.shape[0]
cdef unsigned int K = query_boxes.shape[0]
cdef np.ndarray[double, ndim=2] overlaps = np.zeros((N, K), dtype=np.float64)
cdef double iw, ih, box_area
cdef double ua
cdef unsigned int k, n
for k in range(K):
box_area = (
(query_boxes[k, 2] - query_boxes[k, 0]) *
(query_boxes[k, 3] - query_boxes[k, 1])
)
for n in range(N):
iw = (
min(boxes[n, 2], query_boxes[k, 2]) -
max(boxes[n, 0], query_boxes[k, 0])
)
if iw > 0:
ih = (
min(boxes[n, 3], query_boxes[k, 3]) -
max(boxes[n, 1], query_boxes[k, 1])
)
if ih > 0:
ua = np.float64(
(boxes[n, 2] - boxes[n, 0]) *
(boxes[n, 3] - boxes[n, 1]) +
box_area - iw * ih
)
overlaps[n, k] = iw * ih / ua
return overlaps
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import configparser
import numpy as np
from tensorflow import keras
from ..utils.anchors import AnchorParameters
def read_config_file(config_path):
config = configparser.ConfigParser()
with open(config_path, 'r') as file:
config.read_file(file)
assert 'anchor_parameters' in config, \
"Malformed config file. Verify that it contains the anchor_parameters section."
config_keys = set(config['anchor_parameters'])
default_keys = set(AnchorParameters.default.__dict__.keys())
assert config_keys <= default_keys, \
"Malformed config file. These keys are not valid: {}".format(config_keys - default_keys)
if 'pyramid_levels' in config:
assert('levels' in config['pyramid_levels']), "pyramid levels specified by levels key"
return config
def parse_anchor_parameters(config):
ratios = np.array(list(map(float, config['anchor_parameters']['ratios'].split(' '))), keras.backend.floatx())
scales = np.array(list(map(float, config['anchor_parameters']['scales'].split(' '))), keras.backend.floatx())
sizes = list(map(int, config['anchor_parameters']['sizes'].split(' ')))
strides = list(map(int, config['anchor_parameters']['strides'].split(' ')))
assert (len(sizes) == len(strides)), "sizes and strides should have an equal number of values"
return AnchorParameters(sizes, strides, ratios, scales)
def parse_pyramid_levels(config):
levels = list(map(int, config['pyramid_levels']['levels'].split(' ')))
return levels
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from .anchors import compute_overlap
from .visualization import draw_detections, draw_annotations
from tensorflow import keras
import numpy as np
import os
import time
import cv2
import progressbar
assert(callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead."
def _compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([0.], precision, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def _get_detections(generator, model, score_threshold=0.05, max_detections=100, save_path=None):
""" Get the detections from the model using the generator.
The result is a list of lists such that the size is:
all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]
# Arguments
generator : The generator used to run images through the model.
model : The model to run on the images.
score_threshold : The score confidence threshold to use.
max_detections : The maximum number of detections to use per image.
save_path : The path to save the images with visualized detections to.
# Returns
A list of lists containing the detections for each image in the generator.
"""
all_detections = [[None for i in range(generator.num_classes()) if generator.has_label(i)] for j in range(generator.size())]
all_inferences = [None for i in range(generator.size())]
for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '):
raw_image = generator.load_image(i)
image, scale = generator.resize_image(raw_image.copy())
image = generator.preprocess_image(image)
if keras.backend.image_data_format() == 'channels_first':
image = image.transpose((2, 0, 1))
# run network
start = time.time()
boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))[:3]
inference_time = time.time() - start
# correct boxes for image scale
boxes /= scale
# select indices which have a score above the threshold
indices = np.where(scores[0, :] > score_threshold)[0]
# select those scores
scores = scores[0][indices]
# find the order with which to sort the scores
scores_sort = np.argsort(-scores)[:max_detections]
# select detections
image_boxes = boxes[0, indices[scores_sort], :]
image_scores = scores[scores_sort]
image_labels = labels[0, indices[scores_sort]]
image_detections = np.concatenate([image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
if save_path is not None:
draw_annotations(raw_image, generator.load_annotations(i), label_to_name=generator.label_to_name)
draw_detections(raw_image, image_boxes, image_scores, image_labels, label_to_name=generator.label_to_name, score_threshold=score_threshold)
cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image)
# copy detections to all_detections
for label in range(generator.num_classes()):
if not generator.has_label(label):
continue
all_detections[i][label] = image_detections[image_detections[:, -1] == label, :-1]
all_inferences[i] = inference_time
return all_detections, all_inferences
def _get_annotations(generator):
""" Get the ground truth annotations from the generator.
The result is a list of lists such that the size is:
all_detections[num_images][num_classes] = annotations[num_detections, 5]
# Arguments
generator : The generator used to retrieve ground truth annotations.
# Returns
A list of lists containing the annotations for each image in the generator.
"""
all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '):
# load the annotations
annotations = generator.load_annotations(i)
# copy detections to all_annotations
for label in range(generator.num_classes()):
if not generator.has_label(label):
continue
all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label, :].copy()
return all_annotations
def evaluate(
generator,
model,
iou_threshold=0.5,
score_threshold=0.05,
max_detections=100,
save_path=None
):
""" Evaluate a given dataset using a given model.
# Arguments
generator : The generator that represents the dataset to evaluate.
model : The model to evaluate.
iou_threshold : The threshold used to consider when a detection is positive or negative.
score_threshold : The score confidence threshold to use for detections.
max_detections : The maximum number of detections to use per image.
save_path : The path to save images with visualized detections to.
# Returns
A dict mapping class names to mAP scores.
"""
# gather all detections and annotations
all_detections, all_inferences = _get_detections(generator, model, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path)
all_annotations = _get_annotations(generator)
average_precisions = {}
# all_detections = pickle.load(open('all_detections.pkl', 'rb'))
# all_annotations = pickle.load(open('all_annotations.pkl', 'rb'))
# pickle.dump(all_detections, open('all_detections.pkl', 'wb'))
# pickle.dump(all_annotations, open('all_annotations.pkl', 'wb'))
# process detections and annotations
for label in range(generator.num_classes()):
if not generator.has_label(label):
continue
false_positives = np.zeros((0,))
true_positives = np.zeros((0,))
scores = np.zeros((0,))
num_annotations = 0.0
for i in range(generator.size()):
detections = all_detections[i][label]
annotations = all_annotations[i][label]
num_annotations += annotations.shape[0]
detected_annotations = []
for d in detections:
scores = np.append(scores, d[4])
if annotations.shape[0] == 0:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)
continue
overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
assigned_annotation = np.argmax(overlaps, axis=1)
max_overlap = overlaps[0, assigned_annotation]
if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
false_positives = np.append(false_positives, 0)
true_positives = np.append(true_positives, 1)
detected_annotations.append(assigned_annotation)
else:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)
# no annotations -> AP for this class is 0 (is this correct?)
if num_annotations == 0:
average_precisions[label] = 0, 0
continue
# sort by score
indices = np.argsort(-scores)
false_positives = false_positives[indices]
true_positives = true_positives[indices]
# compute false positives and true positives
false_positives = np.cumsum(false_positives)
true_positives = np.cumsum(true_positives)
# compute recall and precision
recall = true_positives / num_annotations
precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
# compute average precision
average_precision = _compute_ap(recall, precision)
average_precisions[label] = average_precision, num_annotations
# inference time
inference_time = np.sum(all_inferences) / generator.size()
return average_precisions, inference_time
"""
Copyright 2017-2019 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow as tf
def setup_gpu(gpu_id):
try:
visible_gpu_indices = [int(id) for id in gpu_id.split(',')]
available_gpus = tf.config.list_physical_devices('GPU')
visible_gpus = [gpu for idx, gpu in enumerate(available_gpus) if idx in visible_gpu_indices]
if visible_gpus:
try:
# Currently, memory growth needs to be the same across GPUs.
for gpu in available_gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# Use only the selcted gpu.
tf.config.set_visible_devices(visible_gpus, 'GPU')
except RuntimeError as e:
# Visible devices must be set before GPUs have been initialized.
print(e)
logical_gpus = tf.config.list_logical_devices('GPU')
print(len(available_gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
else:
tf.config.set_visible_devices([], 'GPU')
except ValueError:
tf.config.set_visible_devices([], 'GPU')
This diff is collapsed. Click to expand it.
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
def freeze(model):
""" Set all layers in a model to non-trainable.
The weights for these layers will not be updated during training.
This function modifies the given model in-place,
but it also returns the modified model to allow easy chaining with other functions.
"""
for layer in model.layers:
layer.trainable = False
return model
"""
Copyright 2017-2019 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from __future__ import print_function
import tensorflow as tf
import sys
MINIMUM_TF_VERSION = 2, 3, 0
BLACKLISTED_TF_VERSIONS = []
def tf_version():
""" Get the Tensorflow version.
Returns
tuple of (major, minor, patch).
"""
return tuple(map(int, tf.version.VERSION.split('-')[0].split('.')))
def tf_version_ok(minimum_tf_version=MINIMUM_TF_VERSION, blacklisted=BLACKLISTED_TF_VERSIONS):
""" Check if the current Tensorflow version is higher than the minimum version.
"""
return tf_version() >= minimum_tf_version and tf_version() not in blacklisted
def assert_tf_version(minimum_tf_version=MINIMUM_TF_VERSION, blacklisted=BLACKLISTED_TF_VERSIONS):
""" Assert that the Tensorflow version is up to date.
"""
detected = tf.version.VERSION
required = '.'.join(map(str, minimum_tf_version))
assert(tf_version_ok(minimum_tf_version, blacklisted)), 'You are using tensorflow version {}. The minimum required version is {} (blacklisted: {}).'.format(detected, required, blacklisted)
def check_tf_version():
""" Check that the Tensorflow version is up to date. If it isn't, print an error message and exit the script.
"""
try:
assert_tf_version()
except AssertionError as e:
print(e, file=sys.stderr)
sys.exit(1)
This diff is collapsed. Click to expand it.
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import cv2
import numpy as np
from .colors import label_color
def draw_box(image, box, color, thickness=2):
""" Draws a box on an image with a given color.
# Arguments
image : The image to draw on.
box : A list of 4 elements (x1, y1, x2, y2).
color : The color of the box.
thickness : The thickness of the lines to draw a box with.
"""
b = np.array(box).astype(int)
cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), color, thickness, cv2.LINE_AA)
def draw_caption(image, box, caption):
""" Draws a caption above the box in an image.
# Arguments
image : The image to draw on.
box : A list of 4 elements (x1, y1, x2, y2).
caption : String containing the text to draw.
"""
b = np.array(box).astype(int)
cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)
def draw_boxes(image, boxes, color, thickness=2):
""" Draws boxes on an image with a given color.
# Arguments
image : The image to draw on.
boxes : A [N, 4] matrix (x1, y1, x2, y2).
color : The color of the boxes.
thickness : The thickness of the lines to draw boxes with.
"""
for b in boxes:
draw_box(image, b, color, thickness=thickness)
def draw_detections(image, boxes, scores, labels, color=None, label_to_name=None, score_threshold=0.5):
""" Draws detections in an image.
# Arguments
image : The image to draw on.
boxes : A [N, 4] matrix (x1, y1, x2, y2).
scores : A list of N classification scores.
labels : A list of N labels.
color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used.
label_to_name : (optional) Functor for mapping a label to a name.
score_threshold : Threshold used for determining what detections to draw.
"""
selection = np.where(scores > score_threshold)[0]
for i in selection:
c = color if color is not None else label_color(labels[i])
draw_box(image, boxes[i, :], color=c)
# draw labels
caption = (label_to_name(labels[i]) if label_to_name else labels[i]) + ': {0:.2f}'.format(scores[i])
draw_caption(image, boxes[i, :], caption)
def draw_annotations(image, annotations, color=(0, 255, 0), label_to_name=None):
""" Draws annotations in an image.
# Arguments
image : The image to draw on.
annotations : A [N, 5] matrix (x1, y1, x2, y2, label) or dictionary containing bboxes (shaped [N, 4]) and labels (shaped [N]).
color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used.
label_to_name : (optional) Functor for mapping a label to a name.
"""
if isinstance(annotations, np.ndarray):
annotations = {'bboxes': annotations[:, :4], 'labels': annotations[:, 4]}
assert('bboxes' in annotations)
assert('labels' in annotations)
assert(annotations['bboxes'].shape[0] == annotations['labels'].shape[0])
for i in range(annotations['bboxes'].shape[0]):
label = annotations['labels'][i]
c = color if color is not None else label_color(label)
caption = '{}'.format(label_to_name(label) if label_to_name else label)
draw_caption(image, annotations['bboxes'][i], caption)
draw_box(image, annotations['bboxes'][i], color=c)
cython
keras-resnet==0.2.0
git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
h5py
keras
matplotlib
numpy>=1.14
opencv-python>=3.3.0
pillow
progressbar2
tensorflow>=2.3.0
# ignore:
# E201 whitespace after '['
# E202 whitespace before ']'
# E203 whitespace before ':'
# E221 multiple spaces before operator
# E241 multiple spaces after ','
# E251 unexpected spaces around keyword / parameter equals
# E501 line too long (85 > 79 characters)
# W504 line break after binary operator
[tool:pytest]
flake8-max-line-length = 100
flake8-ignore = E201 E202 E203 E221 E241 E251 E402 E501 W504
import setuptools
from setuptools.extension import Extension
from distutils.command.build_ext import build_ext as DistUtilsBuildExt
class BuildExtension(setuptools.Command):
description = DistUtilsBuildExt.description
user_options = DistUtilsBuildExt.user_options
boolean_options = DistUtilsBuildExt.boolean_options
help_options = DistUtilsBuildExt.help_options
def __init__(self, *args, **kwargs):
from setuptools.command.build_ext import build_ext as SetupToolsBuildExt
# Bypass __setatrr__ to avoid infinite recursion.
self.__dict__['_command'] = SetupToolsBuildExt(*args, **kwargs)
def __getattr__(self, name):
return getattr(self._command, name)
def __setattr__(self, name, value):
setattr(self._command, name, value)
def initialize_options(self, *args, **kwargs):
return self._command.initialize_options(*args, **kwargs)
def finalize_options(self, *args, **kwargs):
ret = self._command.finalize_options(*args, **kwargs)
import numpy
self.include_dirs.append(numpy.get_include())
return ret
def run(self, *args, **kwargs):
return self._command.run(*args, **kwargs)
extensions = [
Extension(
'keras_retinanet.utils.compute_overlap',
['keras_retinanet/utils/compute_overlap.pyx']
),
]
setuptools.setup(
name = 'keras-retinanet',
version = '1.0.0',
description = 'Keras implementation of RetinaNet object detection.',
url = 'https://github.com/fizyr/keras-retinanet',
author = 'Hans Gaiser',
author_email = 'h.gaiser@fizyr.com',
maintainer = 'Hans Gaiser',
maintainer_email = 'h.gaiser@fizyr.com',
cmdclass = {'build_ext': BuildExtension},
packages = setuptools.find_packages(),
install_requires = ['keras-resnet==0.2.0', 'six', 'numpy', 'cython', 'Pillow', 'opencv-python', 'progressbar2'],
entry_points = {
'console_scripts': [
'retinanet-train=keras_retinanet.bin.train:main',
'retinanet-evaluate=keras_retinanet.bin.evaluate:main',
'retinanet-debug=keras_retinanet.bin.debug:main',
'retinanet-convert-model=keras_retinanet.bin.convert_model:main',
],
},
ext_modules = extensions,
setup_requires = ["cython>=0.28", "numpy>=1.14.0"]
)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import numpy as np
from tensorflow import keras
import keras_retinanet.backend
def test_bbox_transform_inv():
boxes = np.array([[
[100, 100, 200, 200],
[100, 100, 300, 300],
[100, 100, 200, 300],
[100, 100, 300, 200],
[80, 120, 200, 200],
[80, 120, 300, 300],
[80, 120, 200, 300],
[80, 120, 300, 200],
]])
boxes = keras.backend.variable(boxes)
deltas = np.array([[
[0 , 0 , 0 , 0 ],
[0 , 0.1, 0 , 0 ],
[-0.3, 0 , 0 , 0 ],
[0.2 , 0.2, 0 , 0 ],
[0 , 0 , 0.1 , 0 ],
[0 , 0 , 0 , -0.3],
[0 , 0 , 0.2 , 0.2 ],
[0.1 , 0.2, -0.3, 0.4 ],
]])
deltas = keras.backend.variable(deltas)
expected = np.array([[
[100 , 100 , 200 , 200 ],
[100 , 104 , 300 , 300 ],
[ 94 , 100 , 200 , 300 ],
[108 , 104 , 300 , 200 ],
[ 80 , 120 , 202.4 , 200 ],
[ 80 , 120 , 300 , 289.2],
[ 80 , 120 , 204.8 , 307.2],
[ 84.4, 123.2, 286.8 , 206.4]
]])
result = keras_retinanet.backend.bbox_transform_inv(boxes, deltas)
result = keras.backend.eval(result)
np.testing.assert_array_almost_equal(result, expected, decimal=2)
def test_shift():
shape = (2, 3)
stride = 8
anchors = np.array([
[-8, -8, 8, 8],
[-16, -16, 16, 16],
[-12, -12, 12, 12],
[-12, -16, 12, 16],
[-16, -12, 16, 12]
], dtype=keras.backend.floatx())
expected = [
# anchors for (0, 0)
[4 - 8, 4 - 8, 4 + 8, 4 + 8],
[4 - 16, 4 - 16, 4 + 16, 4 + 16],
[4 - 12, 4 - 12, 4 + 12, 4 + 12],
[4 - 12, 4 - 16, 4 + 12, 4 + 16],
[4 - 16, 4 - 12, 4 + 16, 4 + 12],
# anchors for (0, 1)
[12 - 8, 4 - 8, 12 + 8, 4 + 8],
[12 - 16, 4 - 16, 12 + 16, 4 + 16],
[12 - 12, 4 - 12, 12 + 12, 4 + 12],
[12 - 12, 4 - 16, 12 + 12, 4 + 16],
[12 - 16, 4 - 12, 12 + 16, 4 + 12],
# anchors for (0, 2)
[20 - 8, 4 - 8, 20 + 8, 4 + 8],
[20 - 16, 4 - 16, 20 + 16, 4 + 16],
[20 - 12, 4 - 12, 20 + 12, 4 + 12],
[20 - 12, 4 - 16, 20 + 12, 4 + 16],
[20 - 16, 4 - 12, 20 + 16, 4 + 12],
# anchors for (1, 0)
[4 - 8, 12 - 8, 4 + 8, 12 + 8],
[4 - 16, 12 - 16, 4 + 16, 12 + 16],
[4 - 12, 12 - 12, 4 + 12, 12 + 12],
[4 - 12, 12 - 16, 4 + 12, 12 + 16],
[4 - 16, 12 - 12, 4 + 16, 12 + 12],
# anchors for (1, 1)
[12 - 8, 12 - 8, 12 + 8, 12 + 8],
[12 - 16, 12 - 16, 12 + 16, 12 + 16],
[12 - 12, 12 - 12, 12 + 12, 12 + 12],
[12 - 12, 12 - 16, 12 + 12, 12 + 16],
[12 - 16, 12 - 12, 12 + 16, 12 + 12],
# anchors for (1, 2)
[20 - 8, 12 - 8, 20 + 8, 12 + 8],
[20 - 16, 12 - 16, 20 + 16, 12 + 16],
[20 - 12, 12 - 12, 20 + 12, 12 + 12],
[20 - 12, 12 - 16, 20 + 12, 12 + 16],
[20 - 16, 12 - 12, 20 + 16, 12 + 12],
]
result = keras_retinanet.backend.shift(shape, stride, anchors)
result = keras.backend.eval(result)
np.testing.assert_array_equal(result, expected)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import keras_retinanet.backend
import keras_retinanet.bin.train
from tensorflow import keras
import warnings
import pytest
@pytest.fixture(autouse=True)
def clear_session():
# run before test (do nothing)
yield
# run after test, clear keras session
keras.backend.clear_session()
def test_coco():
# ignore warnings in this test
warnings.simplefilter('ignore')
# run training / evaluation
keras_retinanet.bin.train.main([
'--epochs=1',
'--steps=1',
'--no-weights',
'--no-snapshots',
'coco',
'tests/test-data/coco',
])
def test_pascal():
# ignore warnings in this test
warnings.simplefilter('ignore')
# run training / evaluation
keras_retinanet.bin.train.main([
'--epochs=1',
'--steps=1',
'--no-weights',
'--no-snapshots',
'pascal',
'tests/test-data/pascal',
])
def test_csv():
# ignore warnings in this test
warnings.simplefilter('ignore')
# run training / evaluation
keras_retinanet.bin.train.main([
'--epochs=1',
'--steps=1',
'--no-weights',
'--no-snapshots',
'csv',
'tests/test-data/csv/annotations.csv',
'tests/test-data/csv/classes.csv',
])
def test_vgg():
# ignore warnings in this test
warnings.simplefilter('ignore')
# run training / evaluation
keras_retinanet.bin.train.main([
'--backbone=vgg16',
'--epochs=1',
'--steps=1',
'--no-weights',
'--no-snapshots',
'--freeze-backbone',
'coco',
'tests/test-data/coco',
])
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
import keras_retinanet.backend
import keras_retinanet.layers
import numpy as np
class TestFilterDetections(object):
def test_simple(self):
# create simple FilterDetections layer
filter_detections_layer = keras_retinanet.layers.FilterDetections()
# create simple input
boxes = np.array([[
[0, 0, 10, 10],
[0, 0, 10, 10], # this will be suppressed
]], dtype=keras.backend.floatx())
boxes = keras.backend.constant(boxes)
classification = np.array([[
[0, 0.9], # this will be suppressed
[0, 1],
]], dtype=keras.backend.floatx())
classification = keras.backend.constant(classification)
# compute output
actual_boxes, actual_scores, actual_labels = filter_detections_layer.call([boxes, classification])
actual_boxes = keras.backend.eval(actual_boxes)
actual_scores = keras.backend.eval(actual_scores)
actual_labels = keras.backend.eval(actual_labels)
# define expected output
expected_boxes = -1 * np.ones((1, 300, 4), dtype=keras.backend.floatx())
expected_boxes[0, 0, :] = [0, 0, 10, 10]
expected_scores = -1 * np.ones((1, 300), dtype=keras.backend.floatx())
expected_scores[0, 0] = 1
expected_labels = -1 * np.ones((1, 300), dtype=keras.backend.floatx())
expected_labels[0, 0] = 1
# assert actual and expected are equal
np.testing.assert_array_equal(actual_boxes, expected_boxes)
np.testing.assert_array_equal(actual_scores, expected_scores)
np.testing.assert_array_equal(actual_labels, expected_labels)
def test_simple_with_other(self):
# create simple FilterDetections layer
filter_detections_layer = keras_retinanet.layers.FilterDetections()
# create simple input
boxes = np.array([[
[0, 0, 10, 10],
[0, 0, 10, 10], # this will be suppressed
]], dtype=keras.backend.floatx())
boxes = keras.backend.constant(boxes)
classification = np.array([[
[0, 0.9], # this will be suppressed
[0, 1],
]], dtype=keras.backend.floatx())
classification = keras.backend.constant(classification)
other = []
other.append(np.array([[
[0, 1234], # this will be suppressed
[0, 5678],
]], dtype=keras.backend.floatx()))
other.append(np.array([[
5678, # this will be suppressed
1234,
]], dtype=keras.backend.floatx()))
other = [keras.backend.constant(o) for o in other]
# compute output
actual = filter_detections_layer.call([boxes, classification] + other)
actual_boxes = keras.backend.eval(actual[0])
actual_scores = keras.backend.eval(actual[1])
actual_labels = keras.backend.eval(actual[2])
actual_other = [keras.backend.eval(a) for a in actual[3:]]
# define expected output
expected_boxes = -1 * np.ones((1, 300, 4), dtype=keras.backend.floatx())
expected_boxes[0, 0, :] = [0, 0, 10, 10]
expected_scores = -1 * np.ones((1, 300), dtype=keras.backend.floatx())
expected_scores[0, 0] = 1
expected_labels = -1 * np.ones((1, 300), dtype=keras.backend.floatx())
expected_labels[0, 0] = 1
expected_other = []
expected_other.append(-1 * np.ones((1, 300, 2), dtype=keras.backend.floatx()))
expected_other[-1][0, 0, :] = [0, 5678]
expected_other.append(-1 * np.ones((1, 300), dtype=keras.backend.floatx()))
expected_other[-1][0, 0] = 1234
# assert actual and expected are equal
np.testing.assert_array_equal(actual_boxes, expected_boxes)
np.testing.assert_array_equal(actual_scores, expected_scores)
np.testing.assert_array_equal(actual_labels, expected_labels)
for a, e in zip(actual_other, expected_other):
np.testing.assert_array_equal(a, e)
def test_mini_batch(self):
# create simple FilterDetections layer
filter_detections_layer = keras_retinanet.layers.FilterDetections()
# create input with batch_size=2
boxes = np.array([
[
[0, 0, 10, 10], # this will be suppressed
[0, 0, 10, 10],
],
[
[100, 100, 150, 150],
[100, 100, 150, 150], # this will be suppressed
],
], dtype=keras.backend.floatx())
boxes = keras.backend.constant(boxes)
classification = np.array([
[
[0, 0.9], # this will be suppressed
[0, 1],
],
[
[1, 0],
[0.9, 0], # this will be suppressed
],
], dtype=keras.backend.floatx())
classification = keras.backend.constant(classification)
# compute output
actual_boxes, actual_scores, actual_labels = filter_detections_layer.call([boxes, classification])
actual_boxes = keras.backend.eval(actual_boxes)
actual_scores = keras.backend.eval(actual_scores)
actual_labels = keras.backend.eval(actual_labels)
# define expected output
expected_boxes = -1 * np.ones((2, 300, 4), dtype=keras.backend.floatx())
expected_boxes[0, 0, :] = [0, 0, 10, 10]
expected_boxes[1, 0, :] = [100, 100, 150, 150]
expected_scores = -1 * np.ones((2, 300), dtype=keras.backend.floatx())
expected_scores[0, 0] = 1
expected_scores[1, 0] = 1
expected_labels = -1 * np.ones((2, 300), dtype=keras.backend.floatx())
expected_labels[0, 0] = 1
expected_labels[1, 0] = 0
# assert actual and expected are equal
np.testing.assert_array_equal(actual_boxes, expected_boxes)
np.testing.assert_array_equal(actual_scores, expected_scores)
np.testing.assert_array_equal(actual_labels, expected_labels)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
import keras_retinanet.backend
import keras_retinanet.layers
import numpy as np
class TestAnchors(object):
def test_simple(self):
# create simple Anchors layer
anchors_layer = keras_retinanet.layers.Anchors(
size=32,
stride=8,
ratios=np.array([1], keras.backend.floatx()),
scales=np.array([1], keras.backend.floatx()),
)
# create fake features input (only shape is used anyway)
features = np.zeros((1, 2, 2, 1024), dtype=keras.backend.floatx())
features = keras.backend.variable(features)
# call the Anchors layer
anchors = anchors_layer.call(features)
anchors = keras.backend.eval(anchors)
# expected anchor values
expected = np.array([[
[-12, -12, 20, 20],
[-4 , -12, 28, 20],
[-12, -4 , 20, 28],
[-4 , -4 , 28, 28],
]], dtype=keras.backend.floatx())
# test anchor values
np.testing.assert_array_equal(anchors, expected)
# mark test to fail
def test_mini_batch(self):
# create simple Anchors layer
anchors_layer = keras_retinanet.layers.Anchors(
size=32,
stride=8,
ratios=np.array([1], dtype=keras.backend.floatx()),
scales=np.array([1], dtype=keras.backend.floatx()),
)
# create fake features input with batch_size=2
features = np.zeros((2, 2, 2, 1024), dtype=keras.backend.floatx())
features = keras.backend.variable(features)
# call the Anchors layer
anchors = anchors_layer.call(features)
anchors = keras.backend.eval(anchors)
# expected anchor values
expected = np.array([[
[-12, -12, 20, 20],
[-4 , -12, 28, 20],
[-12, -4 , 20, 28],
[-4 , -4 , 28, 28],
]], dtype=keras.backend.floatx())
expected = np.tile(expected, (2, 1, 1))
# test anchor values
np.testing.assert_array_equal(anchors, expected)
class TestUpsampleLike(object):
def test_simple(self):
# create simple UpsampleLike layer
upsample_like_layer = keras_retinanet.layers.UpsampleLike()
# create input source
source = np.zeros((1, 2, 2, 1), dtype=keras.backend.floatx())
source = keras.backend.variable(source)
target = np.zeros((1, 5, 5, 1), dtype=keras.backend.floatx())
expected = target
target = keras.backend.variable(target)
# compute output
actual = upsample_like_layer.call([source, target])
actual = keras.backend.eval(actual)
np.testing.assert_array_equal(actual, expected)
def test_mini_batch(self):
# create simple UpsampleLike layer
upsample_like_layer = keras_retinanet.layers.UpsampleLike()
# create input source
source = np.zeros((2, 2, 2, 1), dtype=keras.backend.floatx())
source = keras.backend.variable(source)
target = np.zeros((2, 5, 5, 1), dtype=keras.backend.floatx())
expected = target
target = keras.backend.variable(target)
# compute output
actual = upsample_like_layer.call([source, target])
actual = keras.backend.eval(actual)
np.testing.assert_array_equal(actual, expected)
class TestRegressBoxes(object):
def test_simple(self):
mean = [0, 0, 0, 0]
std = [0.2, 0.2, 0.2, 0.2]
# create simple RegressBoxes layer
regress_boxes_layer = keras_retinanet.layers.RegressBoxes(mean=mean, std=std)
# create input
anchors = np.array([[
[0 , 0 , 10 , 10 ],
[50, 50, 100, 100],
[20, 20, 40 , 40 ],
]], dtype=keras.backend.floatx())
anchors = keras.backend.variable(anchors)
regression = np.array([[
[0 , 0 , 0 , 0 ],
[0.1, 0.1, 0 , 0 ],
[0 , 0 , 0.1, 0.1],
]], dtype=keras.backend.floatx())
regression = keras.backend.variable(regression)
# compute output
actual = regress_boxes_layer.call([anchors, regression])
actual = keras.backend.eval(actual)
# compute expected output
expected = np.array([[
[0 , 0 , 10 , 10 ],
[51, 51, 100 , 100 ],
[20, 20, 40.4, 40.4],
]], dtype=keras.backend.floatx())
np.testing.assert_array_almost_equal(actual, expected, decimal=2)
# mark test to fail
def test_mini_batch(self):
mean = [0, 0, 0, 0]
std = [0.2, 0.2, 0.2, 0.2]
# create simple RegressBoxes layer
regress_boxes_layer = keras_retinanet.layers.RegressBoxes(mean=mean, std=std)
# create input
anchors = np.array([
[
[0 , 0 , 10 , 10 ], # 1
[50, 50, 100, 100], # 2
[20, 20, 40 , 40 ], # 3
],
[
[20, 20, 40 , 40 ], # 3
[0 , 0 , 10 , 10 ], # 1
[50, 50, 100, 100], # 2
],
], dtype=keras.backend.floatx())
anchors = keras.backend.variable(anchors)
regression = np.array([
[
[0 , 0 , 0 , 0 ], # 1
[0.1, 0.1, 0 , 0 ], # 2
[0 , 0 , 0.1, 0.1], # 3
],
[
[0 , 0 , 0.1, 0.1], # 3
[0 , 0 , 0 , 0 ], # 1
[0.1, 0.1, 0 , 0 ], # 2
],
], dtype=keras.backend.floatx())
regression = keras.backend.variable(regression)
# compute output
actual = regress_boxes_layer.call([anchors, regression])
actual = keras.backend.eval(actual)
# compute expected output
expected = np.array([
[
[0 , 0 , 10 , 10 ], # 1
[51, 51, 100 , 100 ], # 2
[20, 20, 40.4, 40.4], # 3
],
[
[20, 20, 40.4, 40.4], # 3
[0 , 0 , 10 , 10 ], # 1
[51, 51, 100 , 100 ], # 2
],
], dtype=keras.backend.floatx())
np.testing.assert_array_almost_equal(actual, expected, decimal=2)
"""
Copyright 2018 vidosits (https://github.com/vidosits/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import warnings
import pytest
import numpy as np
from tensorflow import keras
from keras_retinanet import losses
from keras_retinanet.models.densenet import DenseNetBackbone
parameters = ['densenet121']
@pytest.mark.parametrize("backbone", parameters)
def test_backbone(backbone):
# ignore warnings in this test
warnings.simplefilter('ignore')
num_classes = 10
inputs = np.zeros((1, 200, 400, 3), dtype=np.float32)
targets = [np.zeros((1, 14814, 5), dtype=np.float32), np.zeros((1, 14814, num_classes + 1))]
inp = keras.layers.Input(inputs[0].shape)
densenet_backbone = DenseNetBackbone(backbone)
model = densenet_backbone.retinanet(num_classes=num_classes, inputs=inp)
model.summary()
# compile model
model.compile(
loss={
'regression': losses.smooth_l1(),
'classification': losses.focal()
},
optimizer=keras.optimizers.Adam(lr=1e-5, clipnorm=0.001))
model.fit(inputs, targets, batch_size=1)
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.