import os
import tensorflow as tf
# Load compressed models from tensorflow_hub
os.environ['TFHUB_MODEL_LOAD_FORMAT'] = 'COMPRESSED'

#To display images in the notebook
import IPython.display as display

import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (12, 12)
mpl.rcParams['axes.grid'] = False

import numpy as np
import PIL.Image
import time
import functools

# Create an image from a tensor
def tensor_to_image(tensor):
  tensor = tensor*255
  tensor = np.array(tensor, dtype=np.uint8)
  if np.ndim(tensor)>3:
    assert tensor.shape[0] == 1
    tensor = tensor[0]
  return PIL.Image.fromarray(tensor)

# Define a function to load an image and limit its maximum dimension to 512 pixels.
def load_img(path_to_img):
  max_dim = 512
  img = tf.io.read_file(path_to_img)
  img = tf.image.decode_image(img, channels=3)
  img = tf.image.convert_image_dtype(img, tf.float32)

  shape = tf.cast(tf.shape(img)[:-1], tf.float32)
  long_dim = max(shape)
  scale = max_dim / long_dim

  new_shape = tf.cast(shape * scale, tf.int32)

  img = tf.image.resize(img, new_shape)
  img = img[tf.newaxis, :]
  return img

# Display image in the notebook
def imshow(image, title=None):
  if len(image.shape) > 3:
    image = tf.squeeze(image, axis=0)

  plt.imshow(image)
  if title:
    plt.title(title)


content_image = load_img('assets/cat_input.jpg')

x = tf.keras.applications.vgg19.preprocess_input(content_image*255)
x = tf.image.resize(x, (224, 224)) # VGG19 requires images to be 224x224
vgg = tf.keras.applications.VGG19(include_top=True, weights='imagenet')
prediction_probabilities = vgg(x)
prediction_probabilities.shape

predicted_top_5 = tf.keras.applications.vgg19.decode_predictions(prediction_probabilities.numpy())[0]
[(class_name, prob) for (number, class_name, prob) in predicted_top_5]

[('hamper', 0.1864334),
 ('shopping_basket', 0.041492),
 ('carton', 0.035847683),
 ('cradle', 0.033675622),
 ('sleeping_bag', 0.032497372)]


total_variation_weight=200 # 100
learning_rate=0.02
beta_1=0.99
epsilon=1e-2

# number of training steps
epochs = 20
steps_per_epoch = 10

style_image = load_img('assets/style_input.jpg')

content_layers = ['block5_conv2'] 

style_layers = ['block1_conv1',
                'block2_conv1',
                'block3_conv1', 
                'block4_conv1', 
                'block5_conv1']

num_content_layers = len(content_layers)
num_style_layers = len(style_layers)

style_weight=1e-2
content_weight=1e4


def vgg_layers(layer_names):

  # Load our model. Load pretrained VGG, trained on ImageNet data
  vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
  vgg.trainable = False
  
  outputs = [vgg.get_layer(name).output for name in layer_names]

  model = tf.keras.Model([vgg.input], outputs)
  return model


style_extractor = vgg_layers(style_layers)
style_outputs = style_extractor(style_image*255)

#Look at the statistics of each layer's output
for name, output in zip(style_layers, style_outputs):
  print(name)
  print("  shape: ", output.numpy().shape)
  print("  min: ", output.numpy().min())
  print("  max: ", output.numpy().max())
  print("  mean: ", output.numpy().mean())
  print()

block1_conv1
  shape:  (1, 462, 512, 64)
  min:  0.0
  max:  612.49725
  mean:  29.172327

block2_conv1
  shape:  (1, 231, 256, 128)
  min:  0.0
  max:  3813.098
  mean:  156.90674

block3_conv1
  shape:  (1, 115, 128, 256)
  min:  0.0
  max:  7351.9146
  mean:  199.74011

block4_conv1
  shape:  (1, 57, 64, 512)
  min:  0.0
  max:  17799.854
  mean:  658.20306

block5_conv1
  shape:  (1, 28, 32, 512)
  min:  0.0
  max:  2552.0264
  mean:  48.688652


def gram_matrix(input_tensor):
  result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
  input_shape = tf.shape(input_tensor)
  num_locations = tf.cast(input_shape[1]*input_shape[2], tf.float32)
  return result/(num_locations)


class StyleContentModel(tf.keras.models.Model):
  def __init__(self, style_layers, content_layers):
    super(StyleContentModel, self).__init__()
    self.vgg = vgg_layers(style_layers + content_layers)
    self.style_layers = style_layers
    self.content_layers = content_layers
    self.num_style_layers = len(style_layers)
    self.vgg.trainable = False

  def call(self, inputs):
    "Expects float input in [0,1]"
    inputs = inputs*255.0
    preprocessed_input = tf.keras.applications.vgg19.preprocess_input(inputs)
    outputs = self.vgg(preprocessed_input)
    style_outputs, content_outputs = (outputs[:self.num_style_layers],
                                      outputs[self.num_style_layers:])

    style_outputs = [gram_matrix(style_output)
                     for style_output in style_outputs]

    content_dict = {content_name: value
                    for content_name, value
                    in zip(self.content_layers, content_outputs)}

    style_dict = {style_name: value
                  for style_name, value
                  in zip(self.style_layers, style_outputs)}

    return {'content': content_dict, 'style': style_dict}


extractor = StyleContentModel(style_layers, content_layers)

results = extractor(tf.constant(content_image))

print('Styles:')
for name, output in sorted(results['style'].items()):
  print("  ", name)
  print("    shape: ", output.numpy().shape)
  print("    min: ", output.numpy().min())
  print("    max: ", output.numpy().max())
  print("    mean: ", output.numpy().mean())
  print()

print("Contents:")
for name, output in sorted(results['content'].items()):
  print("  ", name)
  print("    shape: ", output.numpy().shape)
  print("    min: ", output.numpy().min())
  print("    max: ", output.numpy().max())
  print("    mean: ", output.numpy().mean())

Styles:
   block1_conv1
    shape:  (1, 64, 64)
    min:  0.0063962084
    max:  19508.418
    mean:  332.7687

   block2_conv1
    shape:  (1, 128, 128)
    min:  0.0
    max:  48189.707
    mean:  8970.164

   block3_conv1
    shape:  (1, 256, 256)
    min:  0.0
    max:  193460.28
    mean:  8509.669

   block4_conv1
    shape:  (1, 512, 512)
    min:  0.0
    max:  2394980.8
    mean:  129696.58

   block5_conv1
    shape:  (1, 512, 512)
    min:  0.0
    max:  76117.125
    mean:  1061.7584

Contents:
   block5_conv2
    shape:  (1, 32, 23, 512)
    min:  0.0
    max:  1291.1721
    mean:  13.581427


style_targets = extractor(style_image)['style']
content_targets = extractor(content_image)['content']

# The image we will be working on
image = tf.Variable(content_image)

# Since this is a float image, define a function to keep the pixel values between 0 and 1
def clip_0_1(image):
  return tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)

# For now we are using the Adam optimiser, but LBFGS supposedly has better results.
opt = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=beta_1, epsilon=epsilon)


def style_content_loss(outputs):
    style_outputs = outputs['style']
    content_outputs = outputs['content']
    style_loss = tf.add_n([tf.reduce_mean((style_outputs[name]-style_targets[name])**2) 
                           for name in style_outputs.keys()])
    style_loss *= style_weight / num_style_layers

    content_loss = tf.add_n([tf.reduce_mean((content_outputs[name]-content_targets[name])**2) 
                             for name in content_outputs.keys()])
    content_loss *= content_weight / num_content_layers
    loss = style_loss + content_loss
    return {"style_loss":style_loss,"content_loss":content_loss,"total_loss":loss}


@tf.function()
def train_step(image):
  with tf.GradientTape() as tape:
    outputs = extractor(image)
    losses = style_content_loss(outputs)
    loss = losses["total_loss"]

  grad = tape.gradient(loss, image)
  opt.apply_gradients([(grad, image)])
  image.assign(clip_0_1(image))
  return {"outputs":outputs,"losses":losses}


style_diffs = []
content_diffs = []
for i in range(10):
    diffs = train_step(image)["losses"]
    style_diffs.append(diffs["style_loss"])
    content_diffs.append(diffs["content_loss"])
  

plt.plot(style_diffs, label="Style Loss")
plt.plot(content_diffs, label="Content Loss")
plt.legend(loc="upper left")
plt.show()

tensor_to_image(image)


def high_pass_x_y(image):
  x_var = image[:, :, 1:, :] - image[:, :, :-1, :]
  y_var = image[:, 1:, :, :] - image[:, :-1, :, :]


  return x_var, y_var
  
x_deltas, y_deltas = high_pass_x_y(content_image)
noise_level = 2

plt.figure(figsize=(14, 10))
plt.subplot(2, 2, 1)
imshow(clip_0_1(noise_level*y_deltas+0.5), "Horizontal Deltas: Original")

plt.subplot(2, 2, 2)
imshow(clip_0_1(noise_level*x_deltas+0.5), "Vertical Deltas: Original")

x_deltas, y_deltas = high_pass_x_y(image)

plt.subplot(2, 2, 3)
imshow(clip_0_1(noise_level*y_deltas+0.5), "Horizontal Deltas: Styled")

plt.subplot(2, 2, 4)
imshow(clip_0_1(noise_level*x_deltas+0.5), "Vertical Deltas: Styled")


# Definition for calculating total variation loss:
#def total_variation_loss(image):
#  x_deltas, y_deltas = high_pass_x_y(image)
#  return tf.reduce_sum(tf.abs(x_deltas)) + tf.reduce_sum(tf.abs(y_deltas))

tf.image.total_variation(image).numpy()

array([73856.59], dtype=float32)


@tf.function()
def train_step(image):
  with tf.GradientTape() as tape:
    outputs = extractor(image)
    losses = style_content_loss(outputs)
    loss = losses["total_loss"]
    loss += total_variation_weight*tf.image.total_variation(image)

  grad = tape.gradient(loss, image)
  opt.apply_gradients([(grad, image)])
  image.assign(clip_0_1(image))
  return {"outputs":outputs,"losses":losses}


opt = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=beta_1, epsilon=epsilon)
image = tf.Variable(content_image)

style_diffs = []
content_diffs = []

step = 0
for n in range(epochs):
  for m in range(steps_per_epoch):
    step += 1
    diffs = train_step(image)["losses"]
    style_diffs.append(diffs["style_loss"])
    content_diffs.append(diffs["content_loss"])
    print(".", end='', flush=True)
  display.clear_output(wait=True)
  display.display(tensor_to_image(image))
  print("Train step: {}".format(step))

  
plt.plot(style_diffs, label="Style Loss")
plt.plot(content_diffs, label="Content Loss")
plt.legend(loc="upper left")
plt.show()

Train step: 200

Neural style transfer¶

About Style Transfer¶

Setup¶

Imports and some useful functions¶

VGG19¶

Define content and style¶

Build the model¶

Extract style and content¶

Run gradient descent¶

Total variation loss¶

Optimize the image with total variation loss¶

References¶