Backport fixes and improvements from external Keras.

Change: 152198296
FurKan7 · Apr 5, 2017 · 9477900 · 9477900
1 parent 8f74d59
commit 9477900
Show file tree

Hide file tree

Showing 22 changed files with 424 additions and 150 deletions.
diff --git a/tensorflow/contrib/keras/python/keras/__init__.py b/tensorflow/contrib/keras/python/keras/__init__.py
@@ -37,4 +37,4 @@
 from tensorflow.contrib.keras.python.keras import wrappers
 
 
-__version__ = '2.0.0-tf'
+__version__ = '2.0.2-tf'
diff --git a/tensorflow/contrib/keras/python/keras/activations.py b/tensorflow/contrib/keras/python/keras/activations.py
@@ -24,18 +24,28 @@
 from tensorflow.contrib.keras.python.keras.utils.generic_utils import deserialize_keras_object
 
 
-def softmax(x):
+def softmax(x, axis=-1):
+  """Softmax activation function.
+
+  Arguments:
+      x : Tensor.
+      axis: Integer, axis along which the softmax normalization is applied.
+
+  Returns:
+      Tensor, output of softmax transformation.
+
+  Raises:
+      ValueError: In case `dim(x) == 1`.
+  """
   ndim = K.ndim(x)
   if ndim == 2:
     return K.softmax(x)
-  elif ndim == 3:
-    e = K.exp(x - K.max(x, axis=-1, keepdims=True))
-    s = K.sum(e, axis=-1, keepdims=True)
+  elif ndim > 2:
+    e = K.exp(x - K.max(x, axis=axis, keepdims=True))
+    s = K.sum(e, axis=axis, keepdims=True)
     return e / s
   else:
-    raise ValueError('Cannot apply softmax to a tensor '
-                     'that is not 2D or 3D. '
-                     'Here, ndim=' + str(ndim))
+    raise ValueError('Cannot apply softmax to a tensor that is 1D')
 
 
 def elu(x, alpha=1.0):

diff --git a/tensorflow/contrib/keras/python/keras/applications/resnet50.py b/tensorflow/contrib/keras/python/keras/applications/resnet50.py
@@ -163,8 +163,8 @@ def ResNet50(include_top=True,
   specified in your Keras config file.
 
   Arguments:
-      include_top: whether to include the 3 fully-connected
-          layers at the top of the network.
+      include_top: whether to include the fully-connected
+          layer at the top of the network.
       weights: one of `None` (random initialization)
           or "imagenet" (pre-training on ImageNet).
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)

diff --git a/tensorflow/contrib/keras/python/keras/backend.py b/tensorflow/contrib/keras/python/keras/backend.py
@@ -22,7 +22,6 @@
 from __future__ import print_function
 
 from collections import defaultdict
-import errno
 import json
 import os
 import warnings
@@ -270,6 +269,7 @@ def clear_session():
   reset_uids()
   _SESSION = None
   phase = array_ops.placeholder(dtype='bool', name='keras_learning_phase')
+  _GRAPH_LEARNING_PHASES = {}
   _GRAPH_LEARNING_PHASES[ops.get_default_graph()] = phase
 
 
@@ -1257,6 +1257,34 @@ def prod(x, axis=None, keepdims=False):
   return math_ops.reduce_prod(x, reduction_indices=axis, keep_dims=keepdims)
 
 
+def cumsum(x, axis=0):
+  """Cumulative sum of the values in a tensor, alongside the specified axis.
+
+  Arguments:
+      x: A tensor or variable.
+      axis: An integer, the axis to compute the sum.
+
+  Returns:
+      A tensor of the cumulative sum of values of `x` along `axis`.
+  """
+  axis = _normalize_axis(axis, ndim(x))
+  return math_ops.cumsum(x, axis=axis)
+
+
+def cumprod(x, axis=0):
+  """Cumulative product of the values in a tensor, alongside the specified axis.
+
+  Arguments:
+      x: A tensor or variable.
+      axis: An integer, the axis to compute the product.
+
+  Returns:
+      A tensor of the cumulative product of values of `x` along `axis`.
+  """
+  axis = _normalize_axis(axis, ndim(x))
+  return math_ops.cumprod(x, axis=axis)
+
+
 def var(x, axis=None, keepdims=False):
   """Variance of a tensor, alongside the specified axis.
 
@@ -1330,8 +1358,7 @@ def any(x, axis=None, keepdims=False):
   """
   axis = _normalize_axis(axis, ndim(x))
   x = math_ops.cast(x, dtypes_module.bool)
-  x = math_ops.reduce_any(x, reduction_indices=axis, keep_dims=keepdims)
-  return math_ops.cast(x, dtypes_module.uint8)
+  return math_ops.reduce_any(x, reduction_indices=axis, keep_dims=keepdims)
 
 
 def all(x, axis=None, keepdims=False):
@@ -1347,8 +1374,7 @@ def all(x, axis=None, keepdims=False):
   """
   axis = _normalize_axis(axis, ndim(x))
   x = math_ops.cast(x, dtypes_module.bool)
-  x = math_ops.reduce_all(x, reduction_indices=axis, keep_dims=keepdims)
-  return math_ops.cast(x, dtypes_module.uint8)
+  return math_ops.reduce_all(x, reduction_indices=axis, keep_dims=keepdims)
 
 
 def argmax(x, axis=-1):
@@ -1645,7 +1671,7 @@ def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3):
   """
   mean, var = nn.moments(
       x, reduction_axes, shift=None, name=None, keep_dims=False)
-  if sorted(reduction_axes) == range(ndim(x))[:-1]:
+  if sorted(reduction_axes) == list(range(ndim(x)))[:-1]:
     normed = nn.batch_normalization(x, mean, var, beta, gamma, epsilon)
   else:
     # need broadcasting
@@ -2324,8 +2350,8 @@ def rnn(step_function,
           (no time dimension),
           containing the initial values for the states used in
           the step function.
-      go_backwards: boolean. If True, do the iteration over
-          the time dimension in reverse order.
+      go_backwards: boolean. If True, do the iteration over the time
+          dimension in reverse order and return the reversed sequence.
       mask: binary tensor with shape `(samples, time, 1)`,
           with a zero for every element that is masked.
       constants: a list of constant values passed at each step.
@@ -2414,9 +2440,9 @@ def rnn(step_function,
         states = return_states
         successive_outputs.append(output)
         successive_states.append(states)
-        last_output = successive_outputs[-1]
-        new_states = successive_states[-1]
-        outputs = array_ops.stack(successive_outputs)
+      last_output = successive_outputs[-1]
+      new_states = successive_states[-1]
+      outputs = array_ops.stack(successive_outputs)
     else:
       for inp in input_list:
         output, states = step_function(inp, states + constants)
@@ -3534,19 +3560,19 @@ def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
 # HIGH ORDER FUNCTIONS
 
 
-def map_fn(fn, elems, name=None):
+def map_fn(fn, elems, name=None, dtype=None):
   """Map the function fn over the elements elems and return the outputs.
 
   Arguments:
       fn: Callable that will be called upon each element in elems
       elems: tensor
       name: A string name for the map node in the graph
+      dtype: Output data type.
 
   Returns:
-      Tensor with first dimension equal to the elems and second depending on
-      fn
+      Tensor with dtype `dtype`.
   """
-  return functional_ops.map_fn(fn, elems, name=name)
+  return functional_ops.map_fn(fn, elems, name=name, dtype=dtype)
 
 
 def foldl(fn, elems, initializer=None, name=None):
@@ -3560,7 +3586,7 @@ def foldl(fn, elems, initializer=None, name=None):
       name: A string name for the foldl node in the graph
 
   Returns:
-      Same type and shape as initializer
+      Tensor with same type and shape as `initializer`.
   """
   return functional_ops.foldl(fn, elems, initializer=initializer, name=name)
 
@@ -3583,27 +3609,39 @@ def foldr(fn, elems, initializer=None, name=None):
 
 # Load Keras default configuration from config file if present.
 _keras_base_dir = os.path.expanduser('~')
-if not os.access(_keras_base_dir, os.W_OK):
-  _keras_base_dir = '/tmp'
 _keras_dir = os.path.join(_keras_base_dir, '.keras')
-if not os.path.exists(_keras_dir):
-  try:
-    os.makedirs(_keras_dir)
-  except OSError as e:
-    if e.errno == errno.EEXIST:
-      pass
-    else:
-      raise
 _config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json'))
 if os.path.exists(_config_path):
-  _config = json.load(open(_config_path))
+  try:
+    _config = json.load(open(_config_path))
+  except json.decoder.JSONDecodeError:
+    _config = {}
   _floatx = _config.get('floatx', floatx())
   assert _floatx in {'float16', 'float32', 'float64'}
   _epsilon = _config.get('epsilon', epsilon())
   assert isinstance(_epsilon, float)
-  _backend = backend()
   _image_data_format = _config.get('image_data_format', image_data_format())
   assert _image_data_format in {'channels_last', 'channels_first'}
   set_floatx(_floatx)
   set_epsilon(_epsilon)
   set_image_data_format(_image_data_format)
+
+# Save config file.
+if os.access(_keras_base_dir, os.W_OK):
+  if not os.path.exists(_keras_dir):
+    try:
+      os.makedirs(_keras_dir)
+    except OSError:
+      # Except potential race conditions
+      # in multi-threaded environments.
+      pass
+
+  if not os.path.exists(_config_path):
+    _config = {
+        'floatx': floatx(),
+        'epsilon': epsilon(),
+        'backend': 'tensorflow',
+        'image_data_format': image_data_format()
+    }
+    with open(_config_path, 'w') as f:
+      f.write(json.dumps(_config, indent=4))
diff --git a/tensorflow/contrib/keras/python/keras/engine/topology.py b/tensorflow/contrib/keras/python/keras/engine/topology.py
@@ -295,8 +295,14 @@ def __init__(self, **kwargs):
     # are only applicable to input layers: do not pass these keywords
     # to non-input layers.
     allowed_kwargs = {
-        'input_shape', 'batch_input_shape', 'batch_size', 'dtype', 'name',
-        'trainable', 'weights'
+        'input_shape',
+        'batch_input_shape',
+        'batch_size',
+        'dtype',
+        'name',
+        'trainable',
+        'weights',
+        'input_dtype',  # legacy
     }
     for kwarg in kwargs:
       if kwarg not in allowed_kwargs:
@@ -320,8 +326,15 @@ def __init__(self, **kwargs):
           batch_size = None
         batch_input_shape = (batch_size,) + tuple(kwargs['input_shape'])
       self.batch_input_shape = batch_input_shape
-      dtype = kwargs.get('dtype', K.floatx())
+
+      # Set dtype.
+      dtype = kwargs.get('dtype')
+      if dtype is None:
+        dtype = kwargs.get('input_dtype')
+      if dtype is None:
+        dtype = K.floatx()
       self.dtype = dtype
+
     if 'weights' in kwargs:
       self._initial_weights = kwargs['weights']
     else:
@@ -485,11 +498,12 @@ def assert_input_compatibility(self, inputs):
                                  ': expected shape=' + str(spec.shape) +
                                  ', found shape=' + str(x_shape))
 
-  def call(self, inputs):
+  def call(self, inputs, **kwargs):  # pylint: disable=unused-argument
     """This is where the layer's logic lives.
 
     Arguments:
-        inputs: input tensor, or list/tuple of input tensors.
+        inputs: Input tensor, or list/tuple of input tensors.
+        **kwargs: Additional keyword arguments.
 
     Returns:
         A tensor or list/tuple of tensors.
@@ -518,6 +532,8 @@ def __call__(self, inputs, **kwargs):
         ValueError: in case the layer is missing shape information
             for its `build` call.
     """
+    if isinstance(inputs, list):
+      inputs = inputs[:]
     with K.name_scope(self.name):
       # Handle laying building (weight creating, input spec locking).
       if not self.built:
@@ -1417,7 +1433,7 @@ class Container(Layer):
       get_weights
       set_weights
       get_config
-      get_output_shape_for
+      compute_output_shape
 
   # Class Methods
       from_config
@@ -2029,7 +2045,7 @@ def _compute_output_shape(self, input_shape):
       for i in range(len(input_shapes)):
         layer = self.input_layers[i]
         input_shape = input_shapes[i]
-        # It's an input layer: get_output_shape_for is identity,
+        # It's an input layer: compute_output_shape is identity,
         # and there is only one node and one tensor output.
         shape_key = layer.name + '_0_0'
         layers_to_output_shapes[shape_key] = input_shape

diff --git a/tensorflow/contrib/keras/python/keras/engine/training.py b/tensorflow/contrib/keras/python/keras/engine/training.py
@@ -733,11 +733,12 @@ def compile(self,
       loss_functions = []
       for name in self.output_names:
         if name not in loss:
-          warnings.warn('Output "' + name + '" missing from loss dictionary. '
-                        'We assume this was done on purpose, '
-                        'and we will not be expecting '
-                        'any data to be passed to "' + name +
-                        '" during training.')
+          warnings.warn(
+              'Output "' + name + '" missing from loss dictionary. '
+              'We assume this was done on purpose, '
+              'and we will not be expecting '
+              'any data to be passed to "' + name + '" during training.',
+              stacklevel=2)
         loss_functions.append(losses.get(loss.get(name)))
     elif isinstance(loss, list):
       if len(loss) != len(self.outputs):
@@ -1202,7 +1203,7 @@ def _predict_loop(self, f, ins, batch_size=32, verbose=0):
       if batch_index == 0:
         for batch_out in batch_outs:
           shape = (samples,) + batch_out.shape[1:]
-          outs.append(np.zeros(shape, dtype=K.floatx()))
+          outs.append(np.zeros(shape, dtype=batch_out.dtype))
 
       for i, batch_out in enumerate(batch_outs):
         outs[i][batch_start:batch_end] = batch_out
@@ -1718,7 +1719,7 @@ def fit_generator(self,
             - a tuple (inputs, targets, sample_weights).
             All arrays should contain the same number of samples.
             The generator is expected to loop over its data
-            indefinitely. An epoch finishes when `samples_per_epoch`
+            indefinitely. An epoch finishes when `steps_per_epoch`
             samples have been seen by the model.
         steps_per_epoch: Total number of steps (batches of samples)
             to yield from `generator` before declaring one epoch
@@ -1767,7 +1768,7 @@ def generate_arrays_from_file(path):
                 f.close()
 
         model.fit_generator(generate_arrays_from_file('/my_file.txt'),
-                            samples_per_epoch=10000, epochs=10)
+                            steps_per_epoch=10000, epochs=10)
     ```
 
     Raises:
@@ -2028,7 +2029,8 @@ def predict_generator(self,
                         steps,
                         max_q_size=10,
                         workers=1,
-                        pickle_safe=False):
+                        pickle_safe=False,
+                        verbose=0):
     """Generates predictions for the input samples from a data generator.
 
     The generator should return the same kind of data as accepted by
@@ -2048,6 +2050,7 @@ def predict_generator(self,
             non picklable arguments to the generator
             as they can't be passed
             easily to children processes.
+        verbose: verbosity mode, 0 or 1.
 
     Returns:
         Numpy array(s) of predictions.
@@ -2067,6 +2070,9 @@ def predict_generator(self,
       enqueuer = GeneratorEnqueuer(generator, pickle_safe=pickle_safe)
       enqueuer.start(workers=workers, max_q_size=max_q_size)
 
+      if verbose == 1:
+        progbar = Progbar(target=steps)
+
       while steps_done < steps:
         generator_output = None
         while enqueuer.is_running():
@@ -2103,6 +2109,8 @@ def predict_generator(self,
         for i, out in enumerate(outs):
           all_outs[i].append(out)
         steps_done += 1
+        if verbose == 1:
+          progbar.update(steps_done)
 
     finally:
       if enqueuer is not None:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -37,4 +37,4 @@
		from tensorflow.contrib.keras.python.keras import wrappers


		__version__ = '2.0.0-tf'
		__version__ = '2.0.2-tf'