tensorflow-inception file upload

최예리
Commit dfc01a8144797fe89a5ddf443d19e8d2738c0164 dfc01a81 1 parent 62452b41
Showing 2 changed files with 1405 additions and 0 deletions
tensorflow/retrain.py
tensorflow/retrain_run_inference.py
--- a/tensorflow/retrain.py 0 → 100644
View file @dfc01a8
+++ b/tensorflow/retrain.py 0 → 100644
View file @dfc01a8
+ # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ #
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ #
+ #     http://www.apache.org/licenses/LICENSE-2.0
+ #
+ # Unless required by applicable law or agreed to in writing, software
+ # distributed under the License is distributed on an "AS IS" BASIS,
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ # See the License for the specific language governing permissions and
+ # limitations under the License.
+ # ==============================================================================
+ # NOTICE: This work was derived from tensorflow/examples/image_retraining
+ # and modified to use TensorFlow Hub modules.
+ 
+ # pylint: disable=line-too-long
+ r"""Simple transfer learning with image modules from TensorFlow Hub.
+ 
+ This example shows how to train an image classifier based on any
+ TensorFlow Hub module that computes image feature vectors. By default,
+ it uses the feature vectors computed by Inception V3 trained on ImageNet.
+ For more options, search https://tfhub.dev for image feature vector modules.
+ 
+ The top layer receives as input a 2048-dimensional vector (assuming
+ Inception V3) for each image. We train a softmax layer on top of this
+ representation. If the softmax layer contains N labels, this corresponds
+ to learning N + 2048*N model parameters for the biases and weights.
+ 
+ Here's an example, which assumes you have a folder containing class-named
+ subfolders, each full of images for each label. The example folder flower_photos
+ should have a structure like this:
+ 
+ ~/flower_photos/daisy/photo1.jpg
+ ~/flower_photos/daisy/photo2.jpg
+ ...
+ ~/flower_photos/rose/anotherphoto77.jpg
+ ...
+ ~/flower_photos/sunflower/somepicture.jpg
+ 
+ The subfolder names are important, since they define what label is applied to
+ each image, but the filenames themselves don't matter. (For a working example,
+ download http://download.tensorflow.org/example_images/flower_photos.tgz
+ and run  tar xzf flower_photos.tgz  to unpack it.)
+ 
+ Once your images are prepared, and you have pip-installed tensorflow-hub and
+ a sufficiently recent version of tensorflow, you can run the training with a
+ command like this:
+ 
+ ```bash
+ python retrain.py --image_dir ~/flower_photos
+ ```
+ 
+ You can replace the image_dir argument with any folder containing subfolders of
+ images. The label for each image is taken from the name of the subfolder it's
+ in.
+ 
+ This produces a new model file that can be loaded and run by any TensorFlow
+ program, for example the tensorflow/examples/label_image sample code.
+ 
+ By default this script will use the highly accurate, but comparatively large and
+ slow Inception V3 model architecture. It's recommended that you start with this
+ to validate that you have gathered good training data, but if you want to deploy
+ on resource-limited platforms, you can try the `--tfhub_module` flag with a
+ Mobilenet model. For more information on Mobilenet, see
+ https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html
+ 
+ For example:
+ 
+ Run floating-point version of Mobilenet:
+ 
+ ```bash
+ python retrain.py --image_dir ~/flower_photos \
+     --tfhub_module https://tfhub.dev/google/imagenet/mobilenet_v1_100_224/feature_vector/3
+ ```
+ 
+ Run Mobilenet, instrumented for quantization:
+ 
+ ```bash
+ python retrain.py --image_dir ~/flower_photos/ \
+     --tfhub_module https://tfhub.dev/google/imagenet/mobilenet_v1_100_224/quantops/feature_vector/3
+ ```
+ 
+ These instrumented models can be converted to fully quantized mobile models via
+ TensorFlow Lite.
+ 
+ There are different Mobilenet models to choose from, with a variety of file
+ size and latency options.
+   - The first number can be '100', '075', '050', or '025' to control the number
+     of neurons (activations of hidden layers); the number of weights (and hence
+     to some extent the file size and speed) shrinks with the square of that
+     fraction.
+   - The second number is the input image size. You can choose '224', '192',
+     '160', or '128', with smaller sizes giving faster speeds.
+ 
+ To use with TensorBoard:
+ 
+ By default, this script will log summaries to /tmp/retrain_logs directory
+ 
+ Visualize the summaries with this command:
+ 
+ tensorboard --logdir /tmp/retrain_logs
+ 
+ To use with Tensorflow Serving, run this tool with --saved_model_dir set
+ to some increasingly numbered export location under the model base path, e.g.:
+ 
+ ```bash
+ python retrain.py (... other args as before ...) \
+     --saved_model_dir=/tmp/saved_models/$(date +%s)/
+ tensorflow_model_server --port=9000 --model_name=my_image_classifier \
+     --model_base_path=/tmp/saved_models/
+ ```
+ """
+ # pylint: enable=line-too-long
+ 
+ from __future__ import absolute_import
+ from __future__ import division
+ from __future__ import print_function
+ 
+ from absl import logging
+ 
+ import argparse
+ import collections
+ from datetime import datetime
+ import hashlib
+ import os.path
+ import random
+ import re
+ import sys
+ 
+ import numpy as np
+ import tensorflow as tf
+ import tensorflow_hub as hub
+ from tensorflow.contrib import quantize as contrib_quantize
+ 
+ FLAGS = None
+ 
+ MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1  # ~134M
+ 
+ # A module is understood as instrumented for quantization with TF-Lite
+ # if it contains any of these ops.
+ FAKE_QUANT_OPS = ('FakeQuantWithMinMaxVars',
+                   'FakeQuantWithMinMaxVarsPerChannel')
+ 
+ 
+ def create_image_lists(image_dir, testing_percentage, validation_percentage):
+   """Builds a list of training images from the file system.
+ 
+   Analyzes the sub folders in the image directory, splits them into stable
+   training, testing, and validation sets, and returns a data structure
+   describing the lists of images for each label and their paths.
+ 
+   Args:
+     image_dir: String path to a folder containing subfolders of images.
+     testing_percentage: Integer percentage of the images to reserve for tests.
+     validation_percentage: Integer percentage of images reserved for validation.
+ 
+   Returns:
+     An OrderedDict containing an entry for each label subfolder, with images
+     split into training, testing, and validation sets within each label.
+     The order of items defines the class indices.
+   """
+   if not tf.gfile.Exists(image_dir):
+     logging.error("Image directory '" + image_dir + "' not found.")
+     return None
+   result = collections.OrderedDict()
+   sub_dirs = sorted(x[0] for x in tf.gfile.Walk(image_dir))
+   # The root directory comes first, so skip it.
+   is_root_dir = True
+   for sub_dir in sub_dirs:
+     if is_root_dir:
+       is_root_dir = False
+       continue
+     extensions = sorted(set(os.path.normcase(ext)  # Smash case on Windows.
+                             for ext in ['JPEG', 'JPG', 'jpeg', 'jpg', 'png']))
+     file_list = []
+     dir_name = os.path.basename(
+         # tf.gfile.Walk() returns sub-directory with trailing '/' when it is in
+         # Google Cloud Storage, which confuses os.path.basename().
+         sub_dir[:-1] if sub_dir.endswith('/') else sub_dir)
+ 
+     if dir_name == image_dir:
+       continue
+     logging.info("Looking for images in '%s'",  dir_name)
+     for extension in extensions:
+       file_glob = os.path.join(image_dir, dir_name, '*.' + extension)
+       file_list.extend(tf.gfile.Glob(file_glob))
+     if not file_list:
+       logging.warning('No files found')
+       continue
+     if len(file_list) < 20:
+       logging.warning(
+           'WARNING: Folder has less than 20 images, which may cause issues.')
+     elif len(file_list) > MAX_NUM_IMAGES_PER_CLASS:
+       logging.warning(
+           'WARNING: Folder %s has more than %s images. Some images will '
+           'never be selected.', dir_name, MAX_NUM_IMAGES_PER_CLASS)
+     label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower())
+     training_images = []
+     testing_images = []
+     validation_images = []
+     for file_name in file_list:
+       base_name = os.path.basename(file_name)
+       # We want to ignore anything after '_nohash_' in the file name when
+       # deciding which set to put an image in, the data set creator has a way of
+       # grouping photos that are close variations of each other. For example
+       # this is used in the plant disease data set to group multiple pictures of
+       # the same leaf.
+       hash_name = re.sub(r'_nohash_.*$', '', file_name)
+       # This looks a bit magical, but we need to decide whether this file should
+       # go into the training, testing, or validation sets, and we want to keep
+       # existing files in the same set even if more files are subsequently
+       # added.
+       # To do that, we need a stable way of deciding based on just the file name
+       # itself, so we do a hash of that and then use that to generate a
+       # probability value that we use to assign it.
+       hash_name_hashed = hashlib.sha1(tf.compat.as_bytes(hash_name)).hexdigest()
+       percentage_hash = ((int(hash_name_hashed, 16) %
+                           (MAX_NUM_IMAGES_PER_CLASS + 1)) *
+                          (100.0 / MAX_NUM_IMAGES_PER_CLASS))
+       if percentage_hash < validation_percentage:
+         validation_images.append(base_name)
+       elif percentage_hash < (testing_percentage + validation_percentage):
+         testing_images.append(base_name)
+       else:
+         training_images.append(base_name)
+     result[label_name] = {
+         'dir': dir_name,
+         'training': training_images,
+         'testing': testing_images,
+         'validation': validation_images,
+     }
+   return result
+ 
+ 
+ def get_image_path(image_lists, label_name, index, image_dir, category):
+   """Returns a path to an image for a label at the given index.
+ 
+   Args:
+     image_lists: OrderedDict of training images for each label.
+     label_name: Label string we want to get an image for.
+     index: Int offset of the image we want. This will be moduloed by the
+     available number of images for the label, so it can be arbitrarily large.
+     image_dir: Root folder string of the subfolders containing the training
+     images.
+     category: Name string of set to pull images from - training, testing, or
+     validation.
+ 
+   Returns:
+     File system path string to an image that meets the requested parameters.
+ 
+   """
+   if label_name not in image_lists:
+     logging.fatal('Label does not exist %s.', label_name)
+   label_lists = image_lists[label_name]
+   if category not in label_lists:
+     logging.fatal('Category does not exist %s.', category)
+   category_list = label_lists[category]
+   if not category_list:
+     logging.fatal('Label %s has no images in the category %s.',
+                   label_name, category)
+   mod_index = index % len(category_list)
+   base_name = category_list[mod_index]
+   sub_dir = label_lists['dir']
+   full_path = os.path.join(image_dir, sub_dir, base_name)
+   return full_path
+ 
+ 
+ def get_bottleneck_path(image_lists, label_name, index, bottleneck_dir,
+                         category, module_name):
+   """Returns a path to a bottleneck file for a label at the given index.
+ 
+   Args:
+     image_lists: OrderedDict of training images for each label.
+     label_name: Label string we want to get an image for.
+     index: Integer offset of the image we want. This will be moduloed by the
+     available number of images for the label, so it can be arbitrarily large.
+     bottleneck_dir: Folder string holding cached files of bottleneck values.
+     category: Name string of set to pull images from - training, testing, or
+     validation.
+     module_name: The name of the image module being used.
+ 
+   Returns:
+     File system path string to an image that meets the requested parameters.
+   """
+   module_name = (module_name.replace('://', '~')  # URL scheme.
+                  .replace('/', '~')  # URL and Unix paths.
+                  .replace(':', '~').replace('\\', '~'))  # Windows paths.
+   return get_image_path(image_lists, label_name, index, bottleneck_dir,
+                         category) + '_' + module_name + '.txt'
+ 
+ 
+ def create_module_graph(module_spec):
+   """Creates a graph and loads Hub Module into it.
+ 
+   Args:
+     module_spec: the hub.ModuleSpec for the image module being used.
+ 
+   Returns:
+     graph: the tf.Graph that was created.
+     bottleneck_tensor: the bottleneck values output by the module.
+     resized_input_tensor: the input images, resized as expected by the module.
+     wants_quantization: a boolean, whether the module has been instrumented
+       with fake quantization ops.
+   """
+   height, width = hub.get_expected_image_size(module_spec)
+   with tf.Graph().as_default() as graph:
+     resized_input_tensor = tf.placeholder(tf.float32, [None, height, width, 3])
+     m = hub.Module(module_spec)
+     bottleneck_tensor = m(resized_input_tensor)
+     wants_quantization = any(node.op in FAKE_QUANT_OPS
+                              for node in graph.as_graph_def().node)
+   return graph, bottleneck_tensor, resized_input_tensor, wants_quantization
+ 
+ 
+ def run_bottleneck_on_image(sess, image_data, image_data_tensor,
+                             decoded_image_tensor, resized_input_tensor,
+                             bottleneck_tensor):
+   """Runs inference on an image to extract the 'bottleneck' summary layer.
+ 
+   Args:
+     sess: Current active TensorFlow Session.
+     image_data: String of raw JPEG data.
+     image_data_tensor: Input data layer in the graph.
+     decoded_image_tensor: Output of initial image resizing and preprocessing.
+     resized_input_tensor: The input node of the recognition graph.
+     bottleneck_tensor: Layer before the final softmax.
+ 
+   Returns:
+     Numpy array of bottleneck values.
+   """
+   # First decode the JPEG image, resize it, and rescale the pixel values.
+   resized_input_values = sess.run(decoded_image_tensor,
+                                   {image_data_tensor: image_data})
+   # Then run it through the recognition network.
+   bottleneck_values = sess.run(bottleneck_tensor,
+                                {resized_input_tensor: resized_input_values})
+   bottleneck_values = np.squeeze(bottleneck_values)
+   return bottleneck_values
+ 
+ 
+ def ensure_dir_exists(dir_name):
+   """Makes sure the folder exists on disk.
+ 
+   Args:
+     dir_name: Path string to the folder we want to create.
+   """
+   if not os.path.exists(dir_name):
+     os.makedirs(dir_name)
+ 
+ 
+ def create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
+                            image_dir, category, sess, jpeg_data_tensor,
+                            decoded_image_tensor, resized_input_tensor,
+                            bottleneck_tensor):
+   """Create a single bottleneck file."""
+   logging.debug('Creating bottleneck at %s', bottleneck_path)
+   image_path = get_image_path(image_lists, label_name, index,
+                               image_dir, category)
+   if not tf.gfile.Exists(image_path):
+     logging.fatal('File does not exist %s', image_path)
+   image_data = tf.gfile.GFile(image_path, 'rb').read()
+   try:
+     bottleneck_values = run_bottleneck_on_image(
+         sess, image_data, jpeg_data_tensor, decoded_image_tensor,
+         resized_input_tensor, bottleneck_tensor)
+   except Exception as e:
+     raise RuntimeError('Error during processing file %s (%s)' % (image_path,
+                                                                  str(e)))
+   bottleneck_string = ','.join(str(x) for x in bottleneck_values)
+   with tf.gfile.GFile(bottleneck_path, 'w') as bottleneck_file:
+     bottleneck_file.write(bottleneck_string)
+ 
+ 
+ def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir,
+                              category, bottleneck_dir, jpeg_data_tensor,
+                              decoded_image_tensor, resized_input_tensor,
+                              bottleneck_tensor, module_name):
+   """Retrieves or calculates bottleneck values for an image.
+ 
+   If a cached version of the bottleneck data exists on-disk, return that,
+   otherwise calculate the data and save it to disk for future use.
+ 
+   Args:
+     sess: The current active TensorFlow Session.
+     image_lists: OrderedDict of training images for each label.
+     label_name: Label string we want to get an image for.
+     index: Integer offset of the image we want. This will be modulo-ed by the
+     available number of images for the label, so it can be arbitrarily large.
+     image_dir: Root folder string of the subfolders containing the training
+     images.
+     category: Name string of which set to pull images from - training, testing,
+     or validation.
+     bottleneck_dir: Folder string holding cached files of bottleneck values.
+     jpeg_data_tensor: The tensor to feed loaded jpeg data into.
+     decoded_image_tensor: The output of decoding and resizing the image.
+     resized_input_tensor: The input node of the recognition graph.
+     bottleneck_tensor: The output tensor for the bottleneck values.
+     module_name: The name of the image module being used.
+ 
+   Returns:
+     Numpy array of values produced by the bottleneck layer for the image.
+   """
+   label_lists = image_lists[label_name]
+   sub_dir = label_lists['dir']
+   sub_dir_path = os.path.join(bottleneck_dir, sub_dir)
+   ensure_dir_exists(sub_dir_path)
+   bottleneck_path = get_bottleneck_path(image_lists, label_name, index,
+                                         bottleneck_dir, category, module_name)
+   if not os.path.exists(bottleneck_path):
+     create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
+                            image_dir, category, sess, jpeg_data_tensor,
+                            decoded_image_tensor, resized_input_tensor,
+                            bottleneck_tensor)
+   with tf.gfile.GFile(bottleneck_path, 'r') as bottleneck_file:
+     bottleneck_string = bottleneck_file.read()
+   did_hit_error = False
+   try:
+     bottleneck_values = [float(x) for x in bottleneck_string.split(',')]
+   except ValueError:
+     logging.warning('Invalid float found, recreating bottleneck')
+     did_hit_error = True
+   if did_hit_error:
+     create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
+                            image_dir, category, sess, jpeg_data_tensor,
+                            decoded_image_tensor, resized_input_tensor,
+                            bottleneck_tensor)
+     with tf.gfile.GFile(bottleneck_path, 'r') as bottleneck_file:
+       bottleneck_string = bottleneck_file.read()
+     # Allow exceptions to propagate here, since they shouldn't happen after a
+     # fresh creation
+     bottleneck_values = [float(x) for x in bottleneck_string.split(',')]
+   return bottleneck_values
+ 
+ 
+ def cache_bottlenecks(sess, image_lists, image_dir, bottleneck_dir,
+                       jpeg_data_tensor, decoded_image_tensor,
+                       resized_input_tensor, bottleneck_tensor, module_name):
+   """Ensures all the training, testing, and validation bottlenecks are cached.
+ 
+   Because we're likely to read the same image multiple times (if there are no
+   distortions applied during training) it can speed things up a lot if we
+   calculate the bottleneck layer values once for each image during
+   preprocessing, and then just read those cached values repeatedly during
+   training. Here we go through all the images we've found, calculate those
+   values, and save them off.
+ 
+   Args:
+     sess: The current active TensorFlow Session.
+     image_lists: OrderedDict of training images for each label.
+     image_dir: Root folder string of the subfolders containing the training
+     images.
+     bottleneck_dir: Folder string holding cached files of bottleneck values.
+     jpeg_data_tensor: Input tensor for jpeg data from file.
+     decoded_image_tensor: The output of decoding and resizing the image.
+     resized_input_tensor: The input node of the recognition graph.
+     bottleneck_tensor: The penultimate output layer of the graph.
+     module_name: The name of the image module being used.
+ 
+   Returns:
+     Nothing.
+   """
+   how_many_bottlenecks = 0
+   ensure_dir_exists(bottleneck_dir)
+   for label_name, label_lists in image_lists.items():
+     for category in ['training', 'testing', 'validation']:
+       category_list = label_lists[category]
+       for index, unused_base_name in enumerate(category_list):
+         get_or_create_bottleneck(
+             sess, image_lists, label_name, index, image_dir, category,
+             bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
+             resized_input_tensor, bottleneck_tensor, module_name)
+ 
+         how_many_bottlenecks += 1
+         if how_many_bottlenecks % 100 == 0:
+           logging.info('%s bottleneck files created.', how_many_bottlenecks)
+ 
+ 
+ def get_random_cached_bottlenecks(sess, image_lists, how_many, category,
+                                   bottleneck_dir, image_dir, jpeg_data_tensor,
+                                   decoded_image_tensor, resized_input_tensor,
+                                   bottleneck_tensor, module_name):
+   """Retrieves bottleneck values for cached images.
+ 
+   If no distortions are being applied, this function can retrieve the cached
+   bottleneck values directly from disk for images. It picks a random set of
+   images from the specified category.
+ 
+   Args:
+     sess: Current TensorFlow Session.
+     image_lists: OrderedDict of training images for each label.
+     how_many: If positive, a random sample of this size will be chosen.
+     If negative, all bottlenecks will be retrieved.
+     category: Name string of which set to pull from - training, testing, or
+     validation.
+     bottleneck_dir: Folder string holding cached files of bottleneck values.
+     image_dir: Root folder string of the subfolders containing the training
+     images.
+     jpeg_data_tensor: The layer to feed jpeg image data into.
+     decoded_image_tensor: The output of decoding and resizing the image.
+     resized_input_tensor: The input node of the recognition graph.
+     bottleneck_tensor: The bottleneck output layer of the CNN graph.
+     module_name: The name of the image module being used.
+ 
+   Returns:
+     List of bottleneck arrays, their corresponding ground truths, and the
+     relevant filenames.
+   """
+   class_count = len(image_lists.keys())
+   bottlenecks = []
+   ground_truths = []
+   filenames = []
+   if how_many >= 0:
+     # Retrieve a random sample of bottlenecks.
+     for unused_i in range(how_many):
+       label_index = random.randrange(class_count)
+       label_name = list(image_lists.keys())[label_index]
+       image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1)
+       image_name = get_image_path(image_lists, label_name, image_index,
+                                   image_dir, category)
+       bottleneck = get_or_create_bottleneck(
+           sess, image_lists, label_name, image_index, image_dir, category,
+           bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
+           resized_input_tensor, bottleneck_tensor, module_name)
+       bottlenecks.append(bottleneck)
+       ground_truths.append(label_index)
+       filenames.append(image_name)
+   else:
+     # Retrieve all bottlenecks.
+     for label_index, label_name in enumerate(image_lists.keys()):
+       for image_index, image_name in enumerate(
+           image_lists[label_name][category]):
+         image_name = get_image_path(image_lists, label_name, image_index,
+                                     image_dir, category)
+         bottleneck = get_or_create_bottleneck(
+             sess, image_lists, label_name, image_index, image_dir, category,
+             bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
+             resized_input_tensor, bottleneck_tensor, module_name)
+         bottlenecks.append(bottleneck)
+         ground_truths.append(label_index)
+         filenames.append(image_name)
+   return bottlenecks, ground_truths, filenames
+ 
+ 
+ def get_random_distorted_bottlenecks(
+     sess, image_lists, how_many, category, image_dir, input_jpeg_tensor,
+     distorted_image, resized_input_tensor, bottleneck_tensor):
+   """Retrieves bottleneck values for training images, after distortions.
+ 
+   If we're training with distortions like crops, scales, or flips, we have to
+   recalculate the full model for every image, and so we can't use cached
+   bottleneck values. Instead we find random images for the requested category,
+   run them through the distortion graph, and then the full graph to get the
+   bottleneck results for each.
+ 
+   Args:
+     sess: Current TensorFlow Session.
+     image_lists: OrderedDict of training images for each label.
+     how_many: The integer number of bottleneck values to return.
+     category: Name string of which set of images to fetch - training, testing,
+     or validation.
+     image_dir: Root folder string of the subfolders containing the training
+     images.
+     input_jpeg_tensor: The input layer we feed the image data to.
+     distorted_image: The output node of the distortion graph.
+     resized_input_tensor: The input node of the recognition graph.
+     bottleneck_tensor: The bottleneck output layer of the CNN graph.
+ 
+   Returns:
+     List of bottleneck arrays and their corresponding ground truths.
+   """
+   class_count = len(image_lists.keys())
+   bottlenecks = []
+   ground_truths = []
+   for unused_i in range(how_many):
+     label_index = random.randrange(class_count)
+     label_name = list(image_lists.keys())[label_index]
+     image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1)
+     image_path = get_image_path(image_lists, label_name, image_index, image_dir,
+                                 category)
+     if not tf.gfile.Exists(image_path):
+       logging.fatal('File does not exist %s', image_path)
+     jpeg_data = tf.gfile.GFile(image_path, 'rb').read()
+     # Note that we materialize the distorted_image_data as a numpy array before
+     # sending running inference on the image. This involves 2 memory copies and
+     # might be optimized in other implementations.
+     distorted_image_data = sess.run(distorted_image,
+                                     {input_jpeg_tensor: jpeg_data})
+     bottleneck_values = sess.run(bottleneck_tensor,
+                                  {resized_input_tensor: distorted_image_data})
+     bottleneck_values = np.squeeze(bottleneck_values)
+     bottlenecks.append(bottleneck_values)
+     ground_truths.append(label_index)
+   return bottlenecks, ground_truths
+ 
+ 
+ def should_distort_images(flip_left_right, random_crop, random_scale,
+                           random_brightness):
+   """Whether any distortions are enabled, from the input flags.
+ 
+   Args:
+     flip_left_right: Boolean whether to randomly mirror images horizontally.
+     random_crop: Integer percentage setting the total margin used around the
+     crop box.
+     random_scale: Integer percentage of how much to vary the scale by.
+     random_brightness: Integer range to randomly multiply the pixel values by.
+ 
+   Returns:
+     Boolean value indicating whether any distortions should be applied.
+   """
+   return (flip_left_right or (random_crop != 0) or (random_scale != 0) or
+           (random_brightness != 0))
+ 
+ 
+ def add_input_distortions(flip_left_right, random_crop, random_scale,
+                           random_brightness, module_spec):
+   """Creates the operations to apply the specified distortions.
+ 
+   During training it can help to improve the results if we run the images
+   through simple distortions like crops, scales, and flips. These reflect the
+   kind of variations we expect in the real world, and so can help train the
+   model to cope with natural data more effectively. Here we take the supplied
+   parameters and construct a network of operations to apply them to an image.
+ 
+   Cropping
+   ~~~~~~~~
+ 
+   Cropping is done by placing a bounding box at a random position in the full
+   image. The cropping parameter controls the size of that box relative to the
+   input image. If it's zero, then the box is the same size as the input and no
+   cropping is performed. If the value is 50%, then the crop box will be half the
+   width and height of the input. In a diagram it looks like this:
+ 
+   <       width         >
+   +---------------------+
+   |                     |
+   |   width - crop%     |
+   |    <      >         |
+   |    +------+         |
+   |    |      |         |
+   |    |      |         |
+   |    |      |         |
+   |    +------+         |
+   |                     |
+   |                     |
+   +---------------------+
+ 
+   Scaling
+   ~~~~~~~
+ 
+   Scaling is a lot like cropping, except that the bounding box is always
+   centered and its size varies randomly within the given range. For example if
+   the scale percentage is zero, then the bounding box is the same size as the
+   input and no scaling is applied. If it's 50%, then the bounding box will be in
+   a random range between half the width and height and full size.
+ 
+   Args:
+     flip_left_right: Boolean whether to randomly mirror images horizontally.
+     random_crop: Integer percentage setting the total margin used around the
+     crop box.
+     random_scale: Integer percentage of how much to vary the scale by.
+     random_brightness: Integer range to randomly multiply the pixel values by.
+     graph.
+     module_spec: The hub.ModuleSpec for the image module being used.
+ 
+   Returns:
+     The jpeg input layer and the distorted result tensor.
+   """
+   input_height, input_width = hub.get_expected_image_size(module_spec)
+   input_depth = hub.get_num_image_channels(module_spec)
+   jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput')
+   decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
+   # Convert from full range of uint8 to range [0,1] of float32.
+   decoded_image_as_float = tf.image.convert_image_dtype(decoded_image,
+                                                         tf.float32)
+   decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
+   margin_scale = 1.0 + (random_crop / 100.0)
+   resize_scale = 1.0 + (random_scale / 100.0)
+   margin_scale_value = tf.constant(margin_scale)
+   resize_scale_value = tf.random_uniform(shape=[],
+                                          minval=1.0,
+                                          maxval=resize_scale)
+   scale_value = tf.multiply(margin_scale_value, resize_scale_value)
+   precrop_width = tf.multiply(scale_value, input_width)
+   precrop_height = tf.multiply(scale_value, input_height)
+   precrop_shape = tf.stack([precrop_height, precrop_width])
+   precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32)
+   precropped_image = tf.image.resize_bilinear(decoded_image_4d,
+                                               precrop_shape_as_int)
+   precropped_image_3d = tf.squeeze(precropped_image, axis=[0])
+   cropped_image = tf.random_crop(precropped_image_3d,
+                                  [input_height, input_width, input_depth])
+   if flip_left_right:
+     flipped_image = tf.image.random_flip_left_right(cropped_image)
+   else:
+     flipped_image = cropped_image
+   brightness_min = 1.0 - (random_brightness / 100.0)
+   brightness_max = 1.0 + (random_brightness / 100.0)
+   brightness_value = tf.random_uniform(shape=[],
+                                        minval=brightness_min,
+                                        maxval=brightness_max)
+   brightened_image = tf.multiply(flipped_image, brightness_value)
+   distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult')
+   return jpeg_data, distort_result
+ 
+ 
+ def variable_summaries(var):
+   """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
+   with tf.name_scope('summaries'):
+     mean = tf.reduce_mean(var)
+     tf.summary.scalar('mean', mean)
+     with tf.name_scope('stddev'):
+       stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
+     tf.summary.scalar('stddev', stddev)
+     tf.summary.scalar('max', tf.reduce_max(var))
+     tf.summary.scalar('min', tf.reduce_min(var))
+     tf.summary.histogram('histogram', var)
+ 
+ 
+ def add_final_retrain_ops(class_count, final_tensor_name, bottleneck_tensor,
+                           quantize_layer, is_training):
+   """Adds a new softmax and fully-connected layer for training and eval.
+ 
+   We need to retrain the top layer to identify our new classes, so this function
+   adds the right operations to the graph, along with some variables to hold the
+   weights, and then sets up all the gradients for the backward pass.
+ 
+   The set up for the softmax and fully-connected layers is based on:
+   https://www.tensorflow.org/tutorials/mnist/beginners/index.html
+ 
+   Args:
+     class_count: Integer of how many categories of things we're trying to
+         recognize.
+     final_tensor_name: Name string for the new final node that produces results.
+     bottleneck_tensor: The output of the main CNN graph.
+     quantize_layer: Boolean, specifying whether the newly added layer should be
+         instrumented for quantization with TF-Lite.
+     is_training: Boolean, specifying whether the newly add layer is for training
+         or eval.
+ 
+   Returns:
+     The tensors for the training and cross entropy results, and tensors for the
+     bottleneck input and ground truth input.
+   """
+   batch_size, bottleneck_tensor_size = bottleneck_tensor.get_shape().as_list()
+   assert batch_size is None, 'We want to work with arbitrary batch size.'
+   with tf.name_scope('input'):
+     bottleneck_input = tf.placeholder_with_default(
+         bottleneck_tensor,
+         shape=[batch_size, bottleneck_tensor_size],
+         name='BottleneckInputPlaceholder')
+ 
+     ground_truth_input = tf.placeholder(
+         tf.int64, [batch_size], name='GroundTruthInput')
+ 
+   # Organizing the following ops so they are easier to see in TensorBoard.
+   layer_name = 'final_retrain_ops'
+   with tf.name_scope(layer_name):
+     with tf.name_scope('weights'):
+       initial_value = tf.truncated_normal(
+           [bottleneck_tensor_size, class_count], stddev=0.001)
+       layer_weights = tf.Variable(initial_value, name='final_weights')
+       variable_summaries(layer_weights)
+ 
+     with tf.name_scope('biases'):
+       layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases')
+       variable_summaries(layer_biases)
+ 
+     with tf.name_scope('Wx_plus_b'):
+       logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases
+       tf.summary.histogram('pre_activations', logits)
+ 
+   final_tensor = tf.nn.softmax(logits, name=final_tensor_name)
+ 
+   # The tf.contrib.quantize functions rewrite the graph in place for
+   # quantization. The imported model graph has already been rewritten, so upon
+   # calling these rewrites, only the newly added final layer will be
+   # transformed.
+   if quantize_layer:
+     if is_training:
+       contrib_quantize.create_training_graph()
+     else:
+       contrib_quantize.create_eval_graph()
+ 
+   tf.summary.histogram('activations', final_tensor)
+ 
+   # If this is an eval graph, we don't need to add loss ops or an optimizer.
+   if not is_training:
+     return None, None, bottleneck_input, ground_truth_input, final_tensor
+ 
+   with tf.name_scope('cross_entropy'):
+     cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
+         labels=ground_truth_input, logits=logits)
+ 
+   tf.summary.scalar('cross_entropy', cross_entropy_mean)
+ 
+   with tf.name_scope('train'):
+     optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
+     train_step = optimizer.minimize(cross_entropy_mean)
+ 
+   return (train_step, cross_entropy_mean, bottleneck_input, ground_truth_input,
+           final_tensor)
+ 
+ 
+ def add_evaluation_step(result_tensor, ground_truth_tensor):
+   """Inserts the operations we need to evaluate the accuracy of our results.
+ 
+   Args:
+     result_tensor: The new final node that produces results.
+     ground_truth_tensor: The node we feed ground truth data
+     into.
+ 
+   Returns:
+     Tuple of (evaluation step, prediction).
+   """
+   with tf.name_scope('accuracy'):
+     with tf.name_scope('correct_prediction'):
+       prediction = tf.argmax(result_tensor, 1)
+       correct_prediction = tf.equal(prediction, ground_truth_tensor)
+     with tf.name_scope('accuracy'):
+       evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+   tf.summary.scalar('accuracy', evaluation_step)
+   return evaluation_step, prediction
+ 
+ 
+ def run_final_eval(train_session, module_spec, class_count, image_lists,
+                    jpeg_data_tensor, decoded_image_tensor,
+                    resized_image_tensor, bottleneck_tensor):
+   """Runs a final evaluation on an eval graph using the test data set.
+ 
+   Args:
+     train_session: Session for the train graph with the tensors below.
+     module_spec: The hub.ModuleSpec for the image module being used.
+     class_count: Number of classes
+     image_lists: OrderedDict of training images for each label.
+     jpeg_data_tensor: The layer to feed jpeg image data into.
+     decoded_image_tensor: The output of decoding and resizing the image.
+     resized_image_tensor: The input node of the recognition graph.
+     bottleneck_tensor: The bottleneck output layer of the CNN graph.
+   """
+   test_bottlenecks, test_ground_truth, test_filenames = (
+       get_random_cached_bottlenecks(train_session, image_lists,
+                                     FLAGS.test_batch_size,
+                                     'testing', FLAGS.bottleneck_dir,
+                                     FLAGS.image_dir, jpeg_data_tensor,
+                                     decoded_image_tensor, resized_image_tensor,
+                                     bottleneck_tensor, FLAGS.tfhub_module))
+ 
+   (eval_session, _, bottleneck_input, ground_truth_input, evaluation_step,
+    prediction) = build_eval_session(module_spec, class_count)
+   test_accuracy, predictions = eval_session.run(
+       [evaluation_step, prediction],
+       feed_dict={
+           bottleneck_input: test_bottlenecks,
+           ground_truth_input: test_ground_truth
+       })
+   logging.info('Final test accuracy = %.1f%% (N=%d)',
+                test_accuracy * 100, len(test_bottlenecks))
+ 
+   if FLAGS.print_misclassified_test_images:
+     logging.info('=== MISCLASSIFIED TEST IMAGES ===')
+     for i, test_filename in enumerate(test_filenames):
+       if predictions[i] != test_ground_truth[i]:
+         logging.info('%70s  %s', test_filename,
+                      list(image_lists.keys())[predictions[i]])
+ 
+ 
+ def build_eval_session(module_spec, class_count):
+   """Builds an restored eval session without train operations for exporting.
+ 
+   Args:
+     module_spec: The hub.ModuleSpec for the image module being used.
+     class_count: Number of classes
+ 
+   Returns:
+     Eval session containing the restored eval graph.
+     The bottleneck input, ground truth, eval step, and prediction tensors.
+   """
+   # If quantized, we need to create the correct eval graph for exporting.
+   eval_graph, bottleneck_tensor, resized_input_tensor, wants_quantization = (
+       create_module_graph(module_spec))
+ 
+   eval_sess = tf.Session(graph=eval_graph)
+   with eval_graph.as_default():
+     # Add the new layer for exporting.
+     (_, _, bottleneck_input,
+      ground_truth_input, final_tensor) = add_final_retrain_ops(
+          class_count, FLAGS.final_tensor_name, bottleneck_tensor,
+          wants_quantization, is_training=False)
+ 
+     # Now we need to restore the values from the training graph to the eval
+     # graph.
+     tf.train.Saver().restore(eval_sess, FLAGS.checkpoint_path)
+ 
+     evaluation_step, prediction = add_evaluation_step(final_tensor,
+                                                       ground_truth_input)
+ 
+   return (eval_sess, resized_input_tensor, bottleneck_input, ground_truth_input,
+           evaluation_step, prediction)
+ 
+ 
+ def save_graph_to_file(graph_file_name, module_spec, class_count):
+   """Saves an graph to file, creating a valid quantized one if necessary."""
+   sess, _, _, _, _, _ = build_eval_session(module_spec, class_count)
+   graph = sess.graph
+ 
+   output_graph_def = tf.graph_util.convert_variables_to_constants(
+       sess, graph.as_graph_def(), [FLAGS.final_tensor_name])
+ 
+   with tf.gfile.GFile(graph_file_name, 'wb') as f:
+     f.write(output_graph_def.SerializeToString())
+ 
+ 
+ def prepare_file_system():
+   # Set up the directory we'll write summaries to for TensorBoard
+   if tf.gfile.Exists(FLAGS.summaries_dir):
+     tf.gfile.DeleteRecursively(FLAGS.summaries_dir)
+   tf.gfile.MakeDirs(FLAGS.summaries_dir)
+   if FLAGS.intermediate_store_frequency > 0:
+     ensure_dir_exists(FLAGS.intermediate_output_graphs_dir)
+   return
+ 
+ 
+ def add_jpeg_decoding(module_spec):
+   """Adds operations that perform JPEG decoding and resizing to the graph..
+ 
+   Args:
+     module_spec: The hub.ModuleSpec for the image module being used.
+ 
+   Returns:
+     Tensors for the node to feed JPEG data into, and the output of the
+       preprocessing steps.
+   """
+   input_height, input_width = hub.get_expected_image_size(module_spec)
+   input_depth = hub.get_num_image_channels(module_spec)
+   jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
+   decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
+   # Convert from full range of uint8 to range [0,1] of float32.
+   decoded_image_as_float = tf.image.convert_image_dtype(decoded_image,
+                                                         tf.float32)
+   decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
+   resize_shape = tf.stack([input_height, input_width])
+   resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
+   resized_image = tf.image.resize_bilinear(decoded_image_4d,
+                                            resize_shape_as_int)
+   return jpeg_data, resized_image
+ 
+ 
+ def export_model(module_spec, class_count, saved_model_dir):
+   """Exports model for serving.
+ 
+   Args:
+     module_spec: The hub.ModuleSpec for the image module being used.
+     class_count: The number of classes.
+     saved_model_dir: Directory in which to save exported model and variables.
+   """
+   # The SavedModel should hold the eval graph.
+   sess, in_image, _, _, _, _ = build_eval_session(module_spec, class_count)
+   with sess.graph.as_default() as graph:
+     tf.saved_model.simple_save(
+         sess,
+         saved_model_dir,
+         inputs={'image': in_image},
+         outputs={'prediction': graph.get_tensor_by_name('final_result:0')},
+         legacy_init_op=tf.group(tf.tables_initializer(), name='legacy_init_op')
+     )
+ 
+ 
+ def logging_level_verbosity(logging_verbosity):
+   """Converts logging_level into TensorFlow logging verbosity value.
+ 
+   Args:
+     logging_verbosity: String value representing logging level: 'DEBUG', 'INFO',
+     'WARN', 'ERROR', 'FATAL'
+   """
+   name_to_level = {
+       'FATAL': logging.FATAL,
+       'ERROR': logging.ERROR,
+       'WARN': logging.WARN,
+       'INFO': logging.INFO,
+       'DEBUG': logging.DEBUG
+   }
+ 
+   try:
+     return name_to_level[logging_verbosity]
+   except Exception as e:
+     raise RuntimeError('Not supported logs verbosity (%s). Use one of %s.' %
+                        (str(e), list(name_to_level)))
+ 
+ 
+ def main(_):
+   # Needed to make sure the logging output is visible.
+   # See https://github.com/tensorflow/tensorflow/issues/3047
+   logging_verbosity = logging_level_verbosity(FLAGS.logging_verbosity)
+   logging.set_verbosity(logging_verbosity)
+ 
+   if not FLAGS.image_dir:
+     logging.error('Must set flag --image_dir.')
+     return -1
+ 
+   # Prepare necessary directories that can be used during training
+   prepare_file_system()
+ 
+   # Look at the folder structure, and create lists of all the images.
+   image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage,
+                                    FLAGS.validation_percentage)
+   class_count = len(image_lists.keys())
+   if class_count == 0:
+     logging.error('No valid folders of images found at %s', FLAGS.image_dir)
+     return -1
+   if class_count == 1:
+     logging.error('Only one valid folder of images found at %s '
+                   ' - multiple classes are needed for classification.',
+                   FLAGS.image_dir)
+     return -1
+ 
+   # See if the command-line flags mean we're applying any distortions.
+   do_distort_images = should_distort_images(
+       FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale,
+       FLAGS.random_brightness)
+ 
+   # Set up the pre-trained graph.
+   module_spec = hub.load_module_spec(FLAGS.tfhub_module)
+   graph, bottleneck_tensor, resized_image_tensor, wants_quantization = (
+       create_module_graph(module_spec))
+ 
+   # Add the new layer that we'll be training.
+   with graph.as_default():
+     (train_step, cross_entropy, bottleneck_input,
+      ground_truth_input, final_tensor) = add_final_retrain_ops(
+          class_count, FLAGS.final_tensor_name, bottleneck_tensor,
+          wants_quantization, is_training=True)
+ 
+   with tf.Session(graph=graph) as sess:
+     # Initialize all weights: for the module to their pretrained values,
+     # and for the newly added retraining layer to random initial values.
+     init = tf.global_variables_initializer()
+     sess.run(init)
+ 
+     # Set up the image decoding sub-graph.
+     jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding(module_spec)
+ 
+     if do_distort_images:
+       # We will be applying distortions, so set up the operations we'll need.
+       (distorted_jpeg_data_tensor,
+        distorted_image_tensor) = add_input_distortions(
+            FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale,
+            FLAGS.random_brightness, module_spec)
+     else:
+       # We'll make sure we've calculated the 'bottleneck' image summaries and
+       # cached them on disk.
+       cache_bottlenecks(sess, image_lists, FLAGS.image_dir,
+                         FLAGS.bottleneck_dir, jpeg_data_tensor,
+                         decoded_image_tensor, resized_image_tensor,
+                         bottleneck_tensor, FLAGS.tfhub_module)
+ 
+     # Create the operations we need to evaluate the accuracy of our new layer.
+     evaluation_step, _ = add_evaluation_step(final_tensor, ground_truth_input)
+ 
+     # Merge all the summaries and write them out to the summaries_dir
+     merged = tf.summary.merge_all()
+     train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
+                                          sess.graph)
+ 
+     validation_writer = tf.summary.FileWriter(
+         FLAGS.summaries_dir + '/validation')
+ 
+     # Create a train saver that is used to restore values into an eval graph
+     # when exporting models.
+     train_saver = tf.train.Saver()
+ 
+     # Run the training for as many cycles as requested on the command line.
+     for i in range(FLAGS.how_many_training_steps):
+       # Get a batch of input bottleneck values, either calculated fresh every
+       # time with distortions applied, or from the cache stored on disk.
+       if do_distort_images:
+         (train_bottlenecks,
+          train_ground_truth) = get_random_distorted_bottlenecks(
+              sess, image_lists, FLAGS.train_batch_size, 'training',
+              FLAGS.image_dir, distorted_jpeg_data_tensor,
+              distorted_image_tensor, resized_image_tensor, bottleneck_tensor)
+       else:
+         (train_bottlenecks,
+          train_ground_truth, _) = get_random_cached_bottlenecks(
+              sess, image_lists, FLAGS.train_batch_size, 'training',
+              FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
+              decoded_image_tensor, resized_image_tensor, bottleneck_tensor,
+              FLAGS.tfhub_module)
+       # Feed the bottlenecks and ground truth into the graph, and run a training
+       # step. Capture training summaries for TensorBoard with the `merged` op.
+       train_summary, _ = sess.run(
+           [merged, train_step],
+           feed_dict={bottleneck_input: train_bottlenecks,
+                      ground_truth_input: train_ground_truth})
+       train_writer.add_summary(train_summary, i)
+ 
+       # Every so often, print out how well the graph is training.
+       is_last_step = (i + 1 == FLAGS.how_many_training_steps)
+       if (i % FLAGS.eval_step_interval) == 0 or is_last_step:
+         train_accuracy, cross_entropy_value = sess.run(
+             [evaluation_step, cross_entropy],
+             feed_dict={bottleneck_input: train_bottlenecks,
+                        ground_truth_input: train_ground_truth})
+         logging.info('%s: Step %d: Train accuracy = %.1f%%',
+                      datetime.now(), i, train_accuracy * 100)
+         logging.info('%s: Step %d: Cross entropy = %f',
+                      datetime.now(), i, cross_entropy_value)
+         # TODO: Make this use an eval graph, to avoid quantization
+         # moving averages being updated by the validation set, though in
+         # practice this makes a negligable difference.
+         validation_bottlenecks, validation_ground_truth, _ = (
+             get_random_cached_bottlenecks(
+                 sess, image_lists, FLAGS.validation_batch_size, 'validation',
+                 FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
+                 decoded_image_tensor, resized_image_tensor, bottleneck_tensor,
+                 FLAGS.tfhub_module))
+         # Run a validation step and capture training summaries for TensorBoard
+         # with the `merged` op.
+         validation_summary, validation_accuracy = sess.run(
+             [merged, evaluation_step],
+             feed_dict={bottleneck_input: validation_bottlenecks,
+                        ground_truth_input: validation_ground_truth})
+         validation_writer.add_summary(validation_summary, i)
+         logging.info('%s: Step %d: Validation accuracy = %.1f%% (N=%d)',
+                      datetime.now(), i, validation_accuracy * 100,
+                      len(validation_bottlenecks))
+ 
+       # Store intermediate results
+       intermediate_frequency = FLAGS.intermediate_store_frequency
+ 
+       if (intermediate_frequency > 0 and (i % intermediate_frequency == 0)
+           and i > 0):
+         # If we want to do an intermediate save, save a checkpoint of the train
+         # graph, to restore into the eval graph.
+         train_saver.save(sess, FLAGS.checkpoint_path)
+         intermediate_file_name = (FLAGS.intermediate_output_graphs_dir +
+                                   'intermediate_' + str(i) + '.pb')
+         logging.info('Save intermediate result to : %s', intermediate_file_name)
+         save_graph_to_file(intermediate_file_name, module_spec,
+                            class_count)
+ 
+     # After training is complete, force one last save of the train checkpoint.
+     train_saver.save(sess, FLAGS.checkpoint_path)
+ 
+     # We've completed all our training, so run a final test evaluation on
+     # some new images we haven't used before.
+     run_final_eval(sess, module_spec, class_count, image_lists,
+                    jpeg_data_tensor, decoded_image_tensor, resized_image_tensor,
+                    bottleneck_tensor)
+ 
+     # Write out the trained graph and labels with the weights stored as
+     # constants.
+     logging.info('Save final result to : %s', FLAGS.output_graph)
+     if wants_quantization:
+       logging.info('The model is instrumented for quantization with TF-Lite')
+     save_graph_to_file(FLAGS.output_graph, module_spec, class_count)
+     with tf.gfile.GFile(FLAGS.output_labels, 'w') as f:
+       f.write('\n'.join(image_lists.keys()) + '\n')
+ 
+     if FLAGS.saved_model_dir:
+       export_model(module_spec, class_count, FLAGS.saved_model_dir)
+ 
+ 
+ if __name__ == '__main__':
+   parser = argparse.ArgumentParser()
+   parser.add_argument(
+       '--image_dir',
+       type=str,
+       default='',
+       help='Path to folders of labeled images.'
+   )
+   parser.add_argument(
+       '--output_graph',
+       type=str,
+       default='/tmp/output_graph.pb',
+       help='Where to save the trained graph.'
+   )
+   parser.add_argument(
+       '--intermediate_output_graphs_dir',
+       type=str,
+       default='/tmp/intermediate_graph/',
+       help='Where to save the intermediate graphs.'
+   )
+   parser.add_argument(
+       '--intermediate_store_frequency',
+       type=int,
+       default=0,
+       help="""\
+          How many steps to store intermediate graph. If "0" then will not
+          store.\
+       """
+   )
+   parser.add_argument(
+       '--output_labels',
+       type=str,
+       default='/tmp/output_labels.txt',
+       help='Where to save the trained graph\'s labels.'
+   )
+   parser.add_argument(
+       '--summaries_dir',
+       type=str,
+       default='/tmp/retrain_logs',
+       help='Where to save summary logs for TensorBoard.'
+   )
+   parser.add_argument(
+       '--how_many_training_steps',
+       type=int,
+       default=4000,
+       help='How many training steps to run before ending.'
+   )
+   parser.add_argument(
+       '--learning_rate',
+       type=float,
+       default=0.01,
+       help='How large a learning rate to use when training.'
+   )
+   parser.add_argument(
+       '--testing_percentage',
+       type=int,
+       default=10,
+       help='What percentage of images to use as a test set.'
+   )
+   parser.add_argument(
+       '--validation_percentage',
+       type=int,
+       default=10,
+       help='What percentage of images to use as a validation set.'
+   )
+   parser.add_argument(
+       '--eval_step_interval',
+       type=int,
+       default=10,
+       help='How often to evaluate the training results.'
+   )
+   parser.add_argument(
+       '--train_batch_size',
+       type=int,
+       default=100,
+       help='How many images to train on at a time.'
+   )
+   parser.add_argument(
+       '--test_batch_size',
+       type=int,
+       default=-1,
+       help="""\
+       How many images to test on. This test set is only used once, to evaluate
+       the final accuracy of the model after training completes.
+       A value of -1 causes the entire test set to be used, which leads to more
+       stable results across runs.\
+       """
+   )
+   parser.add_argument(
+       '--validation_batch_size',
+       type=int,
+       default=100,
+       help="""\
+       How many images to use in an evaluation batch. This validation set is
+       used much more often than the test set, and is an early indicator of how
+       accurate the model is during training.
+       A value of -1 causes the entire validation set to be used, which leads to
+       more stable results across training iterations, but may be slower on large
+       training sets.\
+       """
+   )
+   parser.add_argument(
+       '--print_misclassified_test_images',
+       default=False,
+       help="""\
+       Whether to print out a list of all misclassified test images.\
+       """,
+       action='store_true'
+   )
+   parser.add_argument(
+       '--bottleneck_dir',
+       type=str,
+       default='/tmp/bottleneck',
+       help='Path to cache bottleneck layer values as files.'
+   )
+   parser.add_argument(
+       '--final_tensor_name',
+       type=str,
+       default='final_result',
+       help="""\
+       The name of the output classification layer in the retrained graph.\
+       """
+   )
+   parser.add_argument(
+       '--flip_left_right',
+       default=False,
+       help="""\
+       Whether to randomly flip half of the training images horizontally.\
+       """,
+       action='store_true'
+   )
+   parser.add_argument(
+       '--random_crop',
+       type=int,
+       default=0,
+       help="""\
+       A percentage determining how much of a margin to randomly crop off the
+       training images.\
+       """
+   )
+   parser.add_argument(
+       '--random_scale',
+       type=int,
+       default=0,
+       help="""\
+       A percentage determining how much to randomly scale up the size of the
+       training images by.\
+       """
+   )
+   parser.add_argument(
+       '--random_brightness',
+       type=int,
+       default=0,
+       help="""\
+       A percentage determining how much to randomly multiply the training image
+       input pixels up or down by.\
+       """
+   )
+   parser.add_argument(
+       '--tfhub_module',
+       type=str,
+       default=(
+           'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/3'),
+       help="""\
+       Which TensorFlow Hub module to use. For more options,
+       search https://tfhub.dev for image feature vector modules.\
+       """)
+   parser.add_argument(
+       '--saved_model_dir',
+       type=str,
+       default='',
+       help='Where to save the exported graph.')
+   parser.add_argument(
+       '--logging_verbosity',
+       type=str,
+       default='INFO',
+       choices=['DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'],
+       help='How much logging output should be produced.')
+   parser.add_argument(
+       '--checkpoint_path',
+       type=str,
+       default='/tmp/_retrain_checkpoint',
+       help='Where to save checkpoint files.'
+   )
+   FLAGS, unparsed = parser.parse_known_args()
+   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
--- a/tensorflow/retrain_run_inference.py 0 → 100644
View file @dfc01a8
+++ b/tensorflow/retrain_run_inference.py 0 → 100644
View file @dfc01a8
+ # -*- coding: utf-8 -*-
+ 
+ """Inception v3 architecture 모델을 retraining한 모델을 이용해서 이미지에 대한 추론(inference)을 진행하는 예제"""
+ 
+ import numpy as np
+ import tensorflow as tf
+ 
+ imagePath = '/tmp/test_chartreux.jpg'                                      # 추론을 진행할 이미지 경로
+ modelFullPath = '/tmp/output_graph.pb'                                      # 읽어들일 graph 파일 경로
+ labelsFullPath = '/tmp/output_labels.txt'                                   # 읽어들일 labels 파일 경로
+ 
+ 
+ def create_graph():
+     """저장된(saved) GraphDef 파일로부터 graph를 생성하고 saver를 반환한다."""
+     # 저장된(saved) graph_def.pb로부터 graph를 생성한다.
+     with tf.gfile.FastGFile(modelFullPath, 'rb') as f:
+         graph_def = tf.GraphDef()
+         graph_def.ParseFromString(f.read())
+         _ = tf.import_graph_def(graph_def, name='')
+ 
+ 
+ def run_inference_on_image():
+     answer = None
+ 
+     if not tf.gfile.Exists(imagePath):
+         tf.logging.fatal('File does not exist %s', imagePath)
+         return answer
+ 
+     image_data = tf.gfile.FastGFile(imagePath, 'rb').read()
+ 
+     # 저장된(saved) GraphDef 파일로부터 graph를 생성한다.
+     create_graph()
+ 
+     with tf.Session() as sess:
+ 
+         softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
+         predictions = sess.run(softmax_tensor,
+                                {'DecodeJpeg/contents:0': image_data})
+         predictions = np.squeeze(predictions)
+ 
+         top_k = predictions.argsort()[-5:][::-1]  # 가장 높은 확률을 가진 5개(top 5)의 예측값(predictions)을 얻는다.
+         f = open(labelsFullPath, 'rb')
+         lines = f.readlines()
+         labels = [str(w).replace("\n", "") for w in lines]
+         for node_id in top_k:
+             human_string = labels[node_id]
+             score = predictions[node_id]
+             print('%s (score = %.5f)' % (human_string, score))
+ 
+         answer = labels[top_k[0]]
+         return answer
+ 
+ 
+ if __name__ == '__main__':
+     run_inference_on_image()
\ No newline at end of file