AI

Super Resolution using Convolutional Neural Network
Project Report
Submitted in fulfillment of the requirements of

Artificial Intelligence Lab
Submitted by : Submitted to:

Jagmeet Singh(1606700) Prof. Diana Nagpal
Harshveer Sekhon(1606693)
Department of Computer Science and Engineering

Guru Nanak Dev engineering College
Ludhiana, 141006
Super Resolution using Convolutional Neural Network
The SRCNN is a deep convolutional neural network that learns end-to-end mapping of low
resolution to high resolution images. As a result, we can use it to improve the image quality of
low resolution images. This approach uses external example based strategy that leads to
mapping between low/high resolution patches from external datasets. This project aims at
single image super resolution (SR) which is a classical problem in computer vision. In
computer vision, the super resolution is a group of techniques that can reconstruct the high
resolution image from a low resolution (LR) image. It has direct applications in many areas
from satellite imaging, medical imaging to face recognition. To evaluate the performance of
this network, we will be using three image quality metrics: peak signal to noise ratio (PSNR),
mean squared error (MSE), and the structural similarity (SSIM) index.
Main.py
from model import Model
import numpy as np
import tensorflow as tf
import pprint
import os
flags = tf.app.flags
flags.DEFINE_string("arch", "FSRCNN", "Model name [FSRCNN]")
flags.DEFINE_boolean("fast", False, "Use the fast model (FSRCNN-s) [False]")
flags.DEFINE_integer("epoch", 10, "Number of epochs [10]")
flags.DEFINE_integer("batch_size", 32, "The size of batch images [32]")
flags.DEFINE_float("learning_rate", 1e-4, "The learning rate of the adam optimizer [1e-4]")
flags.DEFINE_integer("scale", 2, "The size of scale factor for preprocessing input image [2]")
flags.DEFINE_integer("radius", 1, "Max radius of the deconvolution input tensor [1]")
flags.DEFINE_string("checkpoint_dir", "checkpoint", "Name of checkpoint directory
[checkpoint]")
flags.DEFINE_string("output_dir", "result", "Name of test output directory [result]")
flags.DEFINE_string("data_dir", "Train", "Name of data directory to train on [FastTrain]")
flags.DEFINE_boolean("train", True, "True for training, false for testing [True]")
flags.DEFINE_integer("threads", 1, "Number of processes to pre-process data with [1]")
flags.DEFINE_boolean("distort", False, "Distort some images with JPEG compression
artifacts after downscaling [False]")
flags.DEFINE_boolean("params", False, "Save weight and bias parameters [False]")
FLAGS = flags.FLAGS
pp = pprint.PrettyPrinter()
def main(_):
pp.pprint(flags.FLAGS.__flags)
if FLAGS.fast:
FLAGS.checkpoint_dir = 'fast_{}'.format(FLAGS.checkpoint_dir)
if not os.path.exists(FLAGS.checkpoint_dir):
os.makedirs(FLAGS.checkpoint_dir)
if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir)
with tf.Session() as sess:

model = Model(sess, config=FLAGS)
model.run()
if __name__ == '__main__':
tf.app.run()
FSRCNN.py
from utils import tf_ssim
class Model(object):
def __init__(self, config):

self.name = "FSRCNN"
# Different model layer counts and filter sizes for FSRCNN vs FSRCNN-s (fast), (d, s, m)
in paper
model_params = [32, 0, 4, 1]
self.model_params = model_params
self.scale = config.scale
self.radius = config.radius
self.padding = config.padding
self.images = config.images
self.batch = config.batch
self.image_size = config.image_size - self.padding
self.label_size = config.label_size
def model(self):
d, s, m, r = self.model_params
# Feature Extraction
size = self.padding + 1
weights = tf.get_variable('w1', shape=[size, size, 1, d],
initializer=tf.variance_scaling_initializer())
biases = tf.get_variable('b1', initializer=tf.zeros([d]))
features = tf.nn.conv2d(self.images, weights, strides=[1,1,1,1], padding='VALID',
data_format='NHWC')
features = tf.nn.bias_add(features, biases, data_format='NHWC')
# Shrinking
if self.model_params[1] > 0:
features = self.prelu(features, 1)
weights = tf.get_variable('w2', shape=[1, 1, d, s],
biases = tf.get_variable('b2', initializer=tf.zeros([s]))
features = tf.nn.conv2d(features, weights, strides=[1,1,1,1], padding='SAME',
data_format='NHWC')
features = tf.nn.bias_add(features, biases, data_format='NHWC')
else:
s=d
conv = features
# Mapping (# mapping layers = m)
with tf.variable_scope("mapping_block") as scope:
for ri in range(r):
for i in range(3, m + 3):
weights = tf.get_variable('w{}'.format(i), shape=[3, 3, s, s],
biases = tf.get_variable('b{}'.format(i), initializer=tf.zeros([s]))
if i > 3:
conv = self.prelu(conv, i)
conv = tf.nn.conv2d(conv, weights, strides=[1,1,1,1], padding='SAME',
data_format='NHWC')
conv = tf.nn.bias_add(conv, biases, data_format='NHWC')
if i == m + 2:
conv = self.prelu(conv, m + 3)
weights = tf.get_variable('w{}'.format(m + 3), shape=[1, 1, s, s],
biases = tf.get_variable('b{}'.format(m + 3), initializer=tf.zeros([s]))
data_format='NHWC')
conv = tf.add(conv, features)
scope.reuse_variables()
conv = self.prelu(conv, 2)
# Expanding
if self.model_params[1] > 0:
expand_weights = tf.get_variable('w{}'.format(m + 4), shape=[1, 1, s, d],
expand_biases = tf.get_variable('b{}'.format(m + 4), initializer=tf.zeros([d]))
conv = tf.nn.conv2d(conv, expand_weights, strides=[1,1,1,1], padding='SAME',
data_format='NHWC')
conv = tf.nn.bias_add(conv, expand_biases, data_format='NHWC')
conv = self.prelu(conv, m + 4)
# Sub-pixel convolution
size = self.radius * 2 + 1
deconv_weights = tf.get_variable('deconv_w', shape=[size, size, d, self.scale**2],
initializer=tf.variance_scaling_initializer(scale=0.01))
deconv_biases = tf.get_variable('deconv_b', initializer=tf.zeros([self.scale**2]))
deconv = tf.nn.conv2d(conv, deconv_weights, strides=[1,1,1,1], padding='SAME',
data_format='NHWC')
deconv = tf.nn.bias_add(deconv, deconv_biases, data_format='NHWC')
deconv = tf.depth_to_space(deconv, self.scale, name='pixel_shuffle',
data_format='NHWC')
return deconv
def prelu(self, _x, i):

"""
PreLU tensorflow implementation
"""
alphas = tf.get_variable('alpha{}'.format(i), _x.get_shape()[-1],
initializer=tf.constant_initializer(0.2), dtype=tf.float32)
return tf.nn.relu(_x) - alphas * tf.nn.relu(-_x)
def loss(self, Y, X):

dY = tf.image.sobel_edges(Y)
dX = tf.image.sobel_edges(X)
M = tf.sqrt(tf.square(dY[:,:,:,:,0]) + tf.square(dY[:,:,:,:,1]))
return tf.losses.absolute_difference(dY, dX) \
+ tf.losses.absolute_difference((1.0 - M) * Y, (1.0 - M) * X, weights=2.0)
CSFM.py

self.name = "CSFM"
self.model_params = [8, 2, 4]
self.labels = config.labels
self.image_size = config.image_size - self.padding
def model(self):
d, m, b = self.model_params
features = tf.contrib.layers.conv2d(self.images, d, size, 1, 'VALID', 'NHWC',
activation_fn=None, scope='features')
conv = tf.contrib.layers.conv2d(features, d, 3, 1, 'SAME', 'NHWC', activation_fn=None,

scope='conv1')
shortcuts = conv
for i in range(1, m+1):

with tf.variable_scope("FMM{}".format(i)) as scope:
for bi in range(1, b+1):
res = tf.contrib.layers.conv2d(conv, d*6, 1, 1, 'SAME', 'NHWC',
activation_fn=tf.nn.leaky_relu, scope='widen{}'.format(bi))
res = tf.contrib.layers.conv2d(res, d, 1, 1, 'SAME', 'NHWC', activation_fn=None,
scope='shrink{}'.format(bi))
res = tf.contrib.layers.conv2d(res, d, 3, 1, 'SAME', 'NHWC', activation_fn=None,
scope='embedding{}'.format(bi))
sa = tf.contrib.layers.separable_conv2d(res, None, 3, 1, 1, 'SAME', 'NHWC',

activation_fn=None, scope='sa{}'.format(bi))
ca = tf.reduce_mean(tf.square(res), [1, 2], True) - tf.square(tf.reduce_mean(res, [1,

2], True))
ca = tf.contrib.layers.conv2d(ca, max(d//16, 4), 1, 1, 'SAME', 'NHWC',
activation_fn=tf.nn.leaky_relu, scope='ca_shrink{}'.format(bi))
ca = tf.contrib.layers.conv2d(ca, d, 1, 1, 'SAME', 'NHWC', activation_fn=None,
scope='ca{}'.format(bi))
conv = tf.add(conv, tf.add(res, tf.multiply(res, tf.sigmoid(tf.add(sa, ca)))))
conv = tf.concat([conv, shortcuts], -1)

conv = tf.contrib.layers.conv2d(conv, d, 1, 1, 'SAME', 'NHWC', activation_fn=None,
scope='GF{}'.format(i))
shortcuts = tf.concat([conv, shortcuts], -1)
conv = tf.contrib.layers.conv2d(conv, d, 3, 1, 'SAME', 'NHWC', activation_fn=None,

scope='res')
conv = tf.add(conv, features)
with tf.variable_scope("upscaling"):
conv = tf.nn.leaky_relu(conv)
conv = tf.contrib.layers.conv2d(conv, d * self.scale**2, 3, 1, 'SAME', 'NHWC',
activation_fn=None, scope='sub-pixel_conv')
conv = tf.depth_to_space(conv, self.scale, name='pixel_shuffle', data_format='NHWC')
conv = tf.contrib.layers.conv2d(conv, 1, 3, 1, 'SAME', 'NHWC', activation_fn=None,
scope='final')
return conv

Utils.py
"""
Scipy version > 0.18 is needed, due to 'mode' option from scipy.misc.imread function
"""
import os
import glob
from math import ceil
import subprocess
import io
from random import randrange, shuffle
from PIL import Image
import numpy as np
from multiprocessing import Pool, Lock, active_children
FLAGS = tf.app.flags.FLAGS
downsample = True
def preprocess(path, scale=3, distort=False):

"""
Preprocess single image file
(1) Read original image
(2) Downsample by scale factor
(3) Normalize
"""
try:
from wand.image import Image
except:
image = Image.open(path).convert('L')
(width, height) = image.size
if downsample:
image = image.crop((0, 0, width - width % scale, height - height % scale))
(width, height) = image.size

label_ = np.frombuffer(image.tobytes(), dtype=np.uint8).reshape((height, width))
(new_width, new_height) = width // scale, height // scale

scaled_image = image.resize((new_width, new_height), Image.BICUBIC)
image.close()
if distort==True and randrange(3) == 0:

buf = io.BytesIO()
scaled_image.convert('RGB').save(buf, "JPEG", quality=randrange(85, 95, 5))
buf.seek(0)
scaled_image = Image.open(buf).convert('L')
#scaled_image.convert('RGB').save("lowres.png")
#subprocess.call(['ffmpeg', '-y', '-i', 'lowres.png', '-c:v', 'libx264', '-crf', '20',
'lowres.mkv'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
#subprocess.call(['ffmpeg', '-y', '-i', 'lowres.mkv', '-vframes', '1', 'lowres.png'],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
#scaled_image = Image.open('lowres.png').convert('L')
input_ = np.frombuffer(scaled_image.tobytes(), dtype=np.uint8).reshape((new_height,

new_width))
else:
input_ = np.frombuffer(image.tobytes(), dtype=np.uint8).reshape(height, width)
scaled_image = image.resize((width * scale, height * scale), Image.BICUBIC)
(width, height) = scaled_image.size
label_ = np.frombuffer(scaled_image.tobytes(), dtype=np.uint8).reshape(height, width)
else:
with Image(filename=path) as img:
img.alpha_channel = False
img.transform_colorspace("ycbcr")
if downsample:
img.crop(width = img.width - img.width % scale, height = img.height - img.height %
scale)
label_ = np.frombuffer(img.make_blob('YCbCr'), dtype=np.uint8).reshape(img.height,
img.width, 3)[:,:,0]
img.resize(width = img.width // scale, height = img.height // scale, filter = "lanczos2",
blur=1.0)
if distort==True and randrange(3) == 0:
img.compression_quality = randrange(85, 95, 5)
img.transform_colorspace("rgb")
jpeg_bin = img.make_blob('jpeg')
img = Image(blob=jpeg_bin)
input_ = np.frombuffer(img.make_blob('YCbCr'),
dtype=np.uint8).reshape(img.height, img.width, 3)[:,:,0]
else:
input_ = np.frombuffer(img.make_blob('YCbCr'),
dtype=np.uint8).reshape(img.height, img.width, 3)[:,:,0]
img.resize(width = img.width * scale, height = img.height * scale, filter = "catrom")
label_ = np.frombuffer(img.make_blob('YCbCr'), dtype=np.uint8).reshape(img.height,
img.width, 3)[:,:,0]
return input_ / 255, label_ / 255
def prepare_data(sess, dataset):

"""
Args:
dataset: choose train dataset or test dataset
For train dataset, output data would be ['.../t1.bmp', '.../t2.bmp', ..., '.../t99.bmp']
"""
if FLAGS.train:
data_dir = os.path.join(os.getcwd(), dataset)
data = []
for files in ('*.bmp', '*.png'):
data.extend(glob.glob(os.path.join(data_dir, files)))
shuffle(data)
else:
data_dir = os.path.join(os.sep, (os.path.join(os.getcwd(), dataset)), "Set5")
data = sorted(glob.glob(os.path.join(data_dir, "*.bmp")))
return data
def modcrop(image, scale=3):

"""
To scale down and up the original image, first thing to do is to have no remainder while
scaling operation.
We need to find modulo of height (and width) and scale factor.

Then, subtract the modulo from height (and width) of original image size.
There would be no remainder even after scaling operation.
"""
if len(image.shape) == 3:
h, w, _ = image.shape
h = h - np.mod(h, scale)
w = w - np.mod(w, scale)
image = image[0:h, 0:w, :]
else:
h, w = image.shape
h = h - np.mod(h, scale)
w = w - np.mod(w, scale)
image = image[0:h, 0:w]
return image
def train_input_worker(args):
image_data, config = args
image_size, label_size, stride, scale, padding, distort = config
single_input_sequence, single_label_sequence = [], []
input_, label_ = preprocess(image_data, scale, distort=distort)
if len(input_.shape) == 3:
h, w, _ = input_.shape
else:
h, w = input_.shape
for x in range(0, h - image_size + 1, stride):

for y in range(0, w - image_size + 1, stride):
sub_input = input_[x : x + image_size, y : y + image_size]
x_loc, y_loc = x + padding, y + padding
sub_label = label_[x_loc * scale : x_loc * scale + label_size, y_loc * scale : y_loc * scale
+ label_size]
sub_input = sub_input.reshape([image_size, image_size, 1])

sub_label = sub_label.reshape([label_size, label_size, 1])
single_input_sequence.append(sub_input)
single_label_sequence.append(sub_label)
return [single_input_sequence, single_label_sequence]
def thread_train_setup(config):
"""
Spawns |config.threads| worker processes to pre-process the data
This has not been extensively tested so use at your own risk.
Also this is technically multiprocessing not threading, I just say thread
because it's shorter to type.
"""
if downsample == False:
import sys
sys.exit()
sess = config.sess
# Load data path

data = prepare_data(sess, dataset=config.data_dir)
# Initialize multiprocessing pool with # of processes = config.threads

pool = Pool(config.threads)
# Distribute |images_per_thread| images across each worker process

config_values = [config.image_size, config.label_size, config.stride, config.scale,
config.padding // 2, config.distort]
images_per_thread = len(data) // config.threads
workers = []
for thread in range(config.threads):
args_list = [(data[i], config_values) for i in range(thread * images_per_thread, (thread + 1)
* images_per_thread)]
worker = pool.map_async(train_input_worker, args_list)
workers.append(worker)
print("{} worker processes created".format(config.threads))
pool.close()
results = []
for i in range(len(workers)):
print("Waiting for worker process {}".format(i))
results.extend(workers[i].get(timeout=240))
print("Worker process {} done".format(i))
print("All worker processes done!")
sub_input_sequence, sub_label_sequence = [], []
for image in range(len(results)):

single_input_sequence, single_label_sequence = results[image]
sub_input_sequence.extend(single_input_sequence)
sub_label_sequence.extend(single_label_sequence)
arrdata = np.asarray(sub_input_sequence)
arrlabel = np.asarray(sub_label_sequence)
return (arrdata, arrlabel)
def train_input_setup(config):
"""
Read image files, make their sub-images, and save them as a h5 file format.
"""
if downsample == False:
import sys
sys.exit()
sess = config.sess
image_size, label_size, stride, scale, padding = config.image_size, config.label_size,
config.stride, config.scale, config.padding // 2
# Load data path

data = prepare_data(sess, dataset=config.data_dir)
sub_input_sequence, sub_label_sequence = [], []
for i in range(len(data)):
input_, label_ = preprocess(data[i], scale, distort=config.distort)
else:
h, w = input_.shape
for x in range(0, h - image_size + 1, stride):

for y in range(0, w - image_size + 1, stride):
sub_input = input_[x : x + image_size, y : y + image_size]
x_loc, y_loc = x + padding, y + padding
sub_label = label_[x_loc * scale : x_loc * scale + label_size, y_loc * scale : y_loc * scale
+ label_size]
sub_input = sub_input.reshape([image_size, image_size, 1])

sub_label = sub_label.reshape([label_size, label_size, 1])
sub_input_sequence.append(sub_input)
sub_label_sequence.append(sub_label)
arrdata = np.asarray(sub_input_sequence)
arrlabel = np.asarray(sub_label_sequence)
def test_input_setup(config):
sess = config.sess
# Load data path

data = prepare_data(sess, dataset="Test")
input_, label_ = preprocess(data[2], config.scale)
else:
h, w = input_.shape
arrdata = np.pad(input_.reshape([1, h, w, 1]), ((0,0),(2,2),(2,2),(0,0)), 'reflect')
if len(label_.shape) == 3:
h, w, _ = label_.shape
else:
h, w = label_.shape
arrlabel = label_.reshape([1, h, w, 1])
def merge(config, Y):

"""
Merges super-resolved image with chroma components
"""
h, w = Y.shape[1], Y.shape[2]
Y = Y.reshape(h, w, 1) * 255
Y = Y.round().astype(np.uint8)
data = prepare_data(config.sess, dataset="Test")

src = Image.open(data[2]).convert('YCbCr')
(width, height) = src.size
if downsample is False:
src = src.resize((width * config.scale, height * config.scale), Image.BICUBIC)
(width, height) = src.size
CbCr = np.frombuffer(src.tobytes(), dtype=np.uint8).reshape(height, width, 3)[:,:,1:]
img = np.concatenate((Y, CbCr), axis=-1)
return img
def save_params(sess, params):

param_dir = "params/"
if not os.path.exists(param_dir):
os.makedirs(param_dir)
h = open(param_dir + "weights{}.txt".format('_'.join(str(i) for i in params)), 'w')
variables = dict((var.name, sess.run(var)) for var in tf.trainable_variables())
for name, weights in variables.items():

h.write("{} =\n".format(name[:name.index(':')]))
if len(weights.shape) < 4:
h.write("{}\n\n".format(weights.flatten().tolist()))
else:
h.write("[")
sep = False
for filter_x in range(len(weights)):
for filter_y in range(len(weights[filter_x])):
filter_weights = weights[filter_x][filter_y]
for input_channel in range(len(filter_weights)):
for output_channel in range(len(filter_weights[input_channel])):
val = filter_weights[input_channel][output_channel]
if sep:
h.write(', ')
h.write("{}".format(val))
sep = True
h.write("\n ")
h.write("]\n\n")
h.close()
def array_image_save(array, image_path):

"""
Converts np array to image and saves it
"""
image = Image.fromarray(array, 'YCbCr')
if image.mode != 'RGB':
image = image.convert('RGB')
image.save(image_path)
print("Saved image: {}".format(image_path))
def _tf_fspecial_gauss(size, sigma):

"""Function to mimic the 'fspecial' gaussian MATLAB function
"""
x_data, y_data = np.mgrid[-size//2 + 1:size//2 + 1, -size//2 + 1:size//2 + 1]
x_data = np.expand_dims(x_data, axis=-1)

x_data = np.expand_dims(x_data, axis=-1)
y_data = np.expand_dims(y_data, axis=-1)

y_data = np.expand_dims(y_data, axis=-1)
x = tf.constant(x_data, dtype=tf.float32)
y = tf.constant(y_data, dtype=tf.float32)
g = tf.exp(-((x**2 + y**2)/(2.0*sigma**2)))
return g / tf.reduce_sum(g)
def tf_ssim(img1, img2, cs_map=False, mean_metric=True, sigma=1.5):

size = int(sigma * 3) * 2 + 1
window = _tf_fspecial_gauss(size, sigma)
K1 = 0.01
K2 = 0.03
L = 1 # depth of image (255 in case the image has a differnt scale)
C1 = (K1*L)**2
C2 = (K2*L)**2
mu1 = tf.nn.conv2d(img1, window, strides=[1,1,1,1], padding='VALID',
data_format='NHWC')
mu2 = tf.nn.conv2d(img2, window, strides=[1,1,1,1], padding='VALID',
data_format='NHWC')
mu1_sq = mu1*mu1
mu2_sq = mu2*mu2
mu1_mu2 = mu1*mu2
sigma1_sq = tf.abs(tf.nn.conv2d(img1*img1, window, strides=[1,1,1,1], padding='VALID',
data_format='NHWC') - mu1_sq)
sigma2_sq = tf.abs(tf.nn.conv2d(img2*img2, window, strides=[1,1,1,1], padding='VALID',
data_format='NHWC') - mu2_sq)
sigma12 = tf.nn.conv2d(img1*img2, window, strides=[1,1,1,1], padding='VALID',
data_format='NHWC') - mu1_mu2
if cs_map:
value = (2.0*sigma12 + C2)/(sigma1_sq + sigma2_sq + C2)
else:
value = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*
(sigma1_sq + sigma2_sq + C2))
if mean_metric:
value = tf.reduce_mean(value)
return value
def tf_ms_ssim(img1, img2, sigma=1.5, weights=[0.1, 0.9]):

weights = weights / np.sum(weights)
window = _tf_fspecial_gauss(5, 1)
mssim = []
for i in range(len(weights)):
mssim.append(tf_ssim(img1, img2, sigma=sigma))
img1 = tf.nn.conv2d(img1, window, [1,2,2,1], 'VALID')
img2 = tf.nn.conv2d(img2, window, [1,2,2,1], 'VALID')
value = tf.reduce_sum(tf.multiply(tf.stack(mssim), weights))
return value
def bilinear_upsample_weights(factor, channels):

"""
Create weights matrix for transposed convolution with bilinear filter
initialization.
"""
filter_size = 2 * factor - factor % 2
center = factor - (1 if factor % 2 == 1 else 0.5)
og = np.ogrid[:filter_size, :filter_size]
upsample_kernel = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
weights = np.zeros((filter_size, filter_size, channels, channels), dtype=np.float32)
for i in range(channels):
weights[:, :, i, i] = upsample_kernel
return weights
def bicubic_kernel(x, B=1/3., C=1/3.):

"""https://de.wikipedia.org/wiki/Mitchell-Netravali-Filter"""
if abs(x) < 1:
return 1/6. * ((12-9*B-6*C)*abs(x)**3 + ((-18+12*B+6*C)*abs(x)**2 + (6-2*B)))
elif 1 <= abs(x) and abs(x) < 2:
return 1/6. * ((-B-6*C)*abs(x)**3 + (6*B+30*C)*abs(x)**2 + (-12*B-48*C)*abs(x) +
(8*B+24*C))
else:
return 0
def build_filter(factor, B, C, channels=1):

size = factor * 4
k = np.zeros((size), dtype=np.float32)
for i in range(size):
x = (1 / factor) * (i - np.floor(size / 2) + 0.5)
k[i] = bicubic_kernel(x, B, C)
k = k / np.sum(k)
k = np.outer(k, k)
weights = np.zeros((size, size, channels, channels), dtype=np.float32)
for i in range(channels):
weights[:, :, i, i] = k
return weights
def bicubic_downsample(x, factor, B=1/3., C=1/3.):

"""Downsample x by a factor of factor, using the filter built by build_filter()
x: a rank 4 tensor with format NHWC
factor: downsampling factor (ex: factor=2 means the output size is (h/2, w/2))
"""
# using padding calculations from
https://www.tensorflow.org/api_guides/python/nn#Convolution
kernel_size = factor * 4
padding = kernel_size - factor
pad_top = padding // 2
pad_bottom = padding - pad_top
pad_left = padding // 2
pad_right = padding - pad_left
# apply mirror padding
x = tf.pad(x, [[0,0], [pad_top,pad_bottom], [pad_left,pad_right], [0,0]], mode='REFLECT')
# downsampling performed by strided conv
x = tf.nn.conv2d(x, build_filter(factor, B, C), [1,factor,factor,1], 'VALID',
data_format='NHWC')
return x
model.py
from utils import (

thread_train_setup,
train_input_setup,
test_input_setup,
save_params,
merge,
array_image_save
)
import time
import os
import importlib
from random import randrange
import numpy as np
import pdb
# Based on http://mmlab.ie.cuhk.edu.hk/projects/FSRCNN.html
def __init__(self, sess, config):

self.sess = sess
self.arch = config.arch
self.fast = config.fast
self.train = config.train
self.epoch = config.epoch
self.batch_size = config.batch_size
self.learning_rate = config.learning_rate
self.threads = config.threads
self.distort = config.distort
self.params = config.params
self.padding = 4
# Different image/label sub-sizes for different scaling factors x2, x3, x4
scale_factors = [[20 + self.padding, 40], [14 + self.padding, 42], [12 + self.padding, 48]]
self.image_size, self.label_size = scale_factors[self.scale - 2]
self.stride = self.image_size - self.padding
self.checkpoint_dir = config.checkpoint_dir
self.output_dir = config.output_dir
self.data_dir = config.data_dir
self.init_model()
def init_model(self):
if self.train:
self.images = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 1],
name='images')
self.labels = tf.placeholder(tf.float32, [None, self.label_size, self.label_size, 1],
name='labels')
else:
self.images = tf.placeholder(tf.float32, [None, None, None, 1], name='images')
self.labels = tf.placeholder(tf.float32, [None, None, None, 1], name='labels')
# Batch size differs in training vs testing
self.batch = tf.placeholder(tf.int32, shape=[], name='batch')
model = importlib.import_module(self.arch)
self.model = model.Model(self)
self.pred = self.model.model()
model_dir = "%s_%s_%s_%s" % (self.model.name.lower(), self.label_size, '-'.join(str(i) for
i in self.model.model_params), "r"+str(self.radius))
self.model_dir = os.path.join(self.checkpoint_dir, model_dir)
self.loss = self.model.loss(self.labels, self.pred)
self.saver = tf.train.Saver()
def run(self):
global_step = tf.Variable(0, trainable=False)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
deconv_mult = lambda grads: list(map(lambda x: (x[0] * 1.0, x[1]) if 'deconv' in x[1].name
else x, grads))
grads = deconv_mult(optimizer.compute_gradients(self.loss))
self.train_op = optimizer.apply_gradients(grads, global_step=global_step)
tf.global_variables_initializer().run()
if self.load():
print(" [*] Load SUCCESS")
else:
print(" [!] Load failed...")
if self.params:
save_params(self.sess, self.model.model_params)
elif self.train:
self.run_train()
else:
self.run_test()
def run_train(self):
start_time = time.time()
print("Beginning training setup...")
if self.threads == 1:
train_data, train_label = train_input_setup(self)
else:
train_data, train_label = thread_train_setup(self)
print("Training setup took {} seconds with {} threads".format(time.time() - start_time,
self.threads))
print("Training...")
start_average, end_average, counter = 0, 0, 0
for ep in range(self.epoch):
# Run by batch images
batch_idxs = len(train_data) // self.batch_size
batch_average = 0
for idx in range(0, batch_idxs):
batch_images = train_data[idx * self.batch_size : (idx + 1) * self.batch_size]
batch_labels = train_label[idx * self.batch_size : (idx + 1) * self.batch_size]
for exp in range(3):

if exp==0:
images = batch_images
labels = batch_labels
elif exp==1:
k = randrange(3)+1
images = np.rot90(batch_images, k, (1,2))
labels = np.rot90(batch_labels, k, (1,2))
elif exp==2:
k = randrange(2)
images = batch_images[:,::-1] if k==0 else batch_images[:,:,::-1]
labels = batch_labels[:,::-1] if k==0 else batch_labels[:,:,::-1]
counter += 1
_, err = self.sess.run([self.train_op, self.loss], feed_dict={self.images: images,
self.labels: labels, self.batch: self.batch_size})
batch_average += err
if counter % 10 == 0:
print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]" \
% ((ep+1), counter, time.time() - start_time, err))
# Save every 500 steps

if counter % 500 == 0:
self.save(counter)
batch_average = float(batch_average) / batch_idxs

if ep < (self.epoch * 0.2):
start_average += batch_average
elif ep >= (self.epoch * 0.8):
end_average += batch_average
# Compare loss of the first 20% and the last 20% epochs
start_average = float(start_average) / (self.epoch * 0.2)
end_average = float(end_average) / (self.epoch * 0.2)
print("Start Average: [%.6f], End Average: [%.6f], Improved: [%.2f%%]" \
% (start_average, end_average, 100 - (100*end_average/start_average)))
# Linux desktop notification when training has been completed

# title = "Training complete - FSRCNN"
# notification = "{}-{}-{} done training after {} epochs".format(self.image_size,
self.label_size, self.stride, self.epoch);
# notify_command = 'notify-send "{}" "{}"'.format(title, notification)
# os.system(notify_command)
def run_test(self):
test_data, test_label = test_input_setup(self)
print("Testing...")
result = np.clip(self.pred.eval({self.images: test_data, self.labels: test_label, self.batch: 1}),
0, 1)
passed = time.time() - start_time
img1 = tf.convert_to_tensor(test_label, dtype=tf.float32)
img2 = tf.convert_to_tensor(result, dtype=tf.float32)
psnr = self.sess.run(tf.image.psnr(img1, img2, 1))
ssim = self.sess.run(tf.image.ssim(img1, img2, 1))
print("Took %.3f seconds, PSNR: %.6f, SSIM: %.6f" % (passed, psnr, ssim))
result = merge(self, result)

image_path = os.path.join(os.getcwd(), self.output_dir)
image_path = os.path.join(image_path, "test_image.png")
array_image_save(result, image_path)
def save(self, step):

model_name = self.model.name + ".model"
if not os.path.exists(self.model_dir):
os.makedirs(self.model_dir)
self.saver.save(self.sess,
os.path.join(self.model_dir, model_name),
global_step=step)
def load(self):
print(" [*] Reading checkpoints...")
ckpt = tf.train.get_checkpoint_state(self.model_dir)
if ckpt and ckpt.model_checkpoint_path:
ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
self.saver.restore(self.sess, os.path.join(self.model_dir, ckpt_name))
return True
else:
return False
gen.py
import sys
import math
from itertools import islice
radius = 1
def get_line_number(phrase, file_name):

with open(file_name) as f:
for i, line in enumerate(f, 1):
if phrase in line:
return i
return False
def read_weights(file_name, ln, size=1):

content = []
with open(file_name) as f:
for line in islice(f, ln, ln + size):
if line.find('[') != -1:
line = line[line.index('[') + 1:]
if line.find(']') != -1:
line = line[:line.rindex(']')]
content.append(line)
return [x.strip() for x in content]
def format_weights(weights, n, length=4):

return ",".join(['{:.16f}'.format(float(i)) for i in weights.strip(",").split(",")[n:n+length]])
def base_header(file):
file.write('//!HOOK LUMA\n')
file.write('//!WHEN OUTPUT.w LUMA.w / {0}.400 > OUTPUT.h LUMA.h / {0}.400 >
*\n'.format(scale - 1))
def header1(file, n, d):

base_header(file)
file.write('//!DESC feature map {}\n'.format((n//4)%(d//4) + 1))
file.write('//!BIND LUMA\n')
file.write('//!SAVE FEATURE{}\n'.format((n//4)%(d//4) + 1))
file.write('//!COMPONENTS 4\n')
def header2(file, d, n, s):

base_header(file)
file.write('//!DESC shrinking {}\n'.format((n//4)%(s//4) + 1))
for i in range(d//4):
file.write('//!BIND {}{}\n'.format("FEATURE", i + 1))
file.write('//!SAVE SHRINKED{}\n'.format((n//4)%(s//4) + 1))
def header3(file, r, mi, m, n, s, inp):

base_header(file)
file.write('//!DESC mapping {}_{}\n'.format(mi + 1, (n//4)%(s//4) + 1))
for i in range(s//4):
file.write('//!BIND {}{}\n'.format(inp, i+1 + (0 if (r * m + mi) % 2 == 0 else 20)))
file.write('//!SAVE MODEL{}\n'.format((n//4)%(s//4) + 1 + (20 if (r * m + mi) % 2 == 0
else 0)))
def header3_1(file, r, mi, m, n, s, inp):
base_header(file)
file.write('//!DESC sub-band residuals {}\n'.format((n//4)%(s//4) + 1))
file.write('//!BIND MODEL{}\n'.format(i + 1 + (20 if (r * m + mi) % 2 == 0 else 0)))
file.write('//!BIND {}{}\n'.format(inp, (n//4)%(s//4) + 1))
file.write('//!SAVE RES{}\n'.format((n//4)%(s//4) + 1))
def header4(file, s, m, r, n, d):

base_header(file)
file.write('//!DESC expanding {}\n'.format((n//4)%(d//4) + 1))
file.write('//!BIND RES{}\n'.format(i + 1))
file.write('//!SAVE EXPANDED{}\n'.format((n//4)%(d//4) + 1))
def header5(file, n, d, inp):

base_header(file)
file.write('//!DESC sub-pixel convolution {}\n'.format((n//comps) + 1))
for i in range(d//4):
file.write('//!BIND {}{}\n'.format(inp, i + 1))
file.write('//!SAVE SUBCONV{}\n'.format((n//comps) + 1))
file.write('//!COMPONENTS {}\n'.format(comps))
def header6(file):
base_header(file)
file.write('//!WIDTH LUMA.w {} *\n'.format(scale))
file.write('//!HEIGHT LUMA.h {} *\n'.format(scale))
file.write('//!DESC aggregation\n')
for i in range(scale**2//comps):
file.write('//!BIND SUBCONV{}\n'.format(i + 1))
def main():
if len(sys.argv) == 2:
fname=sys.argv[1]
d, s, m, r = [int(i) for i in fname[7:fname.index('.')].split("_")]
if s == 0:
s=d
shrinking = False
else:
shrinking = True
global scale, comps
deconv_biases = read_weights(fname, get_line_number("deconv_b", fname))
scale = int(math.sqrt(len(deconv_biases[0].split(","))))
dst = fname.replace("_", "-").replace("weights",
"FSRCNNX_x{}_".format(scale)).replace("txt", "glsl")
with open(dst, 'w') as file:
# Feature layer
feature_radius = 2
ln = get_line_number("w1", fname)
weights = read_weights(fname, ln, (feature_radius*2+1)**2)
ln = get_line_number("b1", fname)
biases = read_weights(fname, ln)
for n in range(0, d, 4):
header1(file, n, d)
file.write('vec4 hook()\n')
file.write('{\n')
file.write('vec4 res = vec4({});\n'.format(format_weights(biases[0], n)))
p=0
for l in range(0, len(weights)):
y, x = p%(feature_radius*2+1)-feature_radius, p//(feature_radius*2+1)-
feature_radius
p += 1
file.write('res += vec4({}) *
float(LUMA_texOff(vec2({},{})));\n'.format(format_weights(weights[l], n), x, y))
if shrinking:
ln = get_line_number("alpha1", fname)
alphas = read_weights(fname, ln)
file.write('res = max(res, vec4(0.0)) + vec4({}) * min(res,
vec4(0.0));\n'.format(format_weights(alphas[0], n)))
file.write('return res;\n')
file.write('}\n\n')
if shrinking:
# Shrinking layer
ln = get_line_number("w2", fname)
weights = read_weights(fname, ln, d)
ln = get_line_number("b2", fname)
for n in range(0, s, 4):
header2(file, d, n, s)
file.write('{\n')
for l in range(0, d, 4):
file.write('res += mat4({},{},{},{}) *
FEATURE{}_texOff(vec2(0.0));\n'.format(format_weights(weights[l], n),
format_weights(weights[l+1], n), format_weights(weights[l+2], n),
format_weights(weights[l+3], n), l//4+1))
file.write('}\n\n')
# Mapping layers
inp = "SHRINKED" if shrinking else "FEATURE"
for ri in range(r):
for mi in range(m):
tex_name = inp if ri == 0 and mi == 0 else "RES" if ri > 0 and mi == 0 else "MODEL"
ln = get_line_number("w{}".format(mi + 3), fname)
weights = read_weights(fname, ln, s*9)
ln = get_line_number("b{}".format(mi + 3), fname)
header3(file, ri, mi, m, n, s, tex_name)
file.write('{\n')
p=0
for l in range(0, len(weights), 4):
if l % s == 0:
y, x = p%3-1, p//3-1
p += 1
idx = (l//4)%(s//4)
file.write('res += mat4({},{},{},{}) * {}{}_texOff(vec2({},{}));\n'.format(
format_weights(weights[l], n), format_weights(weights[l+1], n),
tex_name, idx + 1 + (20 if (ri * m + mi) % 2 == 1 else 0), x, y))
ln = get_line_number("alpha{}".format(m + 3 if mi == m - 1 else mi + 4), fname)
file.write('}\n\n')
if mi == m - 1:
ln = get_line_number("w{}".format(m + 3), fname)
weights = read_weights(fname, ln, s*(mi+2))
ln = get_line_number("b{}".format(m + 3), fname)
header3_1(file, ri, mi, m, n, s, inp)
file.write('{\n')
for l in range(0, s, 4):
file.write('res += mat4({},{},{},{}) * MODEL{}_texOff(0);\n'.format(
format_weights(weights[l], n), format_weights(weights[l+1], n),
l//4 + 1 + (20 if (ri * m + mi) % 2 == 0 else 0)))
file.write('res += {}{}_texOff(0);\n'.format(inp, (n//4)%(s//4) + 1))
if ri == r - 1:
ln = get_line_number("alpha2", fname)
file.write('}\n\n')
if shrinking:
# Expanding layer
ln = get_line_number("w{}".format(m + 4), fname)
weights = read_weights(fname, ln, d)
ln = get_line_number("b{}".format(m + 4), fname)
ln = get_line_number("alpha{}".format(m + 4), fname)
for n in range(0, d, 4):
header4(file, s, m, r, n, d)
file.write('{\n')
for l in range(0, s, 4):
file.write('res += mat4({},{},{},{}) *
RES{}_texOff(vec2(0.0));\n'.format(format_weights(weights[l], n),
format_weights(weights[l+3], n),
l//4 + 1))
file.write('}\n\n')
ln = get_line_number("deconv_w", fname)
weights = read_weights(fname, ln, d*(radius*2+1)**2)
ln = get_line_number("deconv_b", fname)
inp = "EXPANDED" if shrinking else "RES"
comps = 3 if scale == 3 else 4
for n in range(0, scale**2, comps):
header5(file, n, d, inp)
file.write('{\n')
file.write('vec{0} res = vec{0}({1});\n'.format(comps, format_weights(biases[0], n,
length=comps)))
p=0
for l in range(0, len(weights), 4):
if l % d == 0:
y, x = p%(radius*2+1)-radius, p//(radius*2+1)-radius
p += 1
idx = (l//4)%(d//4)
file.write('res += mat4x{}({},{},{},{}) * {}{}_texOff(vec2({},{}));\n'.format(
comps, format_weights(weights[l], n, length=comps),
format_weights(weights[l+1], n, length=comps),
inp, idx + 1, x, y))
if comps == 4:
else:
file.write('return vec4(res, 0);\n')
file.write('}\n\n')
# Aggregation
header6(file)
file.write('{\n')
file.write('vec2 fcoord = fract(SUBCONV1_pos * SUBCONV1_size);\n')
file.write('vec2 base = SUBCONV1_pos + (vec2(0.5) - fcoord) * SUBCONV1_pt;\n')
file.write('ivec2 index = ivec2(fcoord * vec2({}));\n'.format(scale))
if scale > 2:
file.write('mat{0} res = mat{0}(SUBCONV1_tex(base).{1}'.format(scale,
"rgba"[:comps]))
for i in range(scale-1):
file.write(',SUBCONV{}_tex(base).{}'.format(i + 2, "rgba"[:comps]))
file.write(');\n')
file.write('return vec4(res[index.x][index.y], 0, 0, 1);\n')
else:
file.write('vec4 res = SUBCONV1_tex(base);\n')
file.write('return vec4(res[index.x * {} + index.y], 0, 0, 1);\n'.format(scale))
file.write('}\n')
else:
print("Missing argument: You must specify a file name")
return
if __name__ == '__main__':
main()
ESPCN.py

self.name = "ESPCN"
self.model_params = [64, 32] #[64, 32, 28]
def model(self):
d = self.model_params
m = len(d) + 2
weights = tf.get_variable('w1', shape=[size, size, 1, d[0]],
biases = tf.get_variable('b1', initializer=tf.zeros([d[0]]))
conv = tf.nn.conv2d(self.images, weights, strides=[1,1,1,1], padding='VALID',
data_format='NHWC')

for i in range(3, m):
weights = tf.get_variable('w{}'.format(i), shape=[3, 3, d[i-3], d[i-2]],
biases = tf.get_variable('b{}'.format(i), initializer=tf.zeros([d[i-2]]))
data_format='NHWC')
deconv_weights = tf.get_variable('deconv_w', shape=[size, size, d[-1], self.scale**2],
data_format='NHWC')
data_format='NHWC')
return deconv

"""
"""

LapSRN.py

self.name = "ESPCN"
self.model_params = [64, 32] #[64, 32, 28]
def model(self):
d = self.model_params
m = len(d) + 2
weights = tf.get_variable('w1', shape=[size, size, 1, d[0]],
biases = tf.get_variable('b1', initializer=tf.zeros([d[0]]))
conv = tf.nn.conv2d(self.images, weights, strides=[1,1,1,1], padding='VALID',
data_format='NHWC')

for i in range(3, m):
weights = tf.get_variable('w{}'.format(i), shape=[3, 3, d[i-3], d[i-2]],
biases = tf.get_variable('b{}'.format(i), initializer=tf.zeros([d[i-2]]))
data_format='NHWC')
deconv_weights = tf.get_variable('deconv_w', shape=[size, size, d[-1], self.scale**2],
data_format='NHWC')
data_format='NHWC')
return deconv

"""
"""

User Interface Representation

Fig. 5.1. TESTING
Original Image Super-Resolved Image

Training Data
Snapshots of system with brief detail of each

Training Dataset -
Dataset
Result -
Interpolated and Super-resolved image

IDE -
Anaconda Spyder IDE

AI

Загружено:

Сведения о документе

Авторское право

Доступные форматы

Поделиться этим документом

Поделиться или встроить документ

Параметры публикации

Этот документ был вам полезен?

Это неприемлемый материал?

Авторское право:

Доступные форматы

AI

Загружено:

Авторское право:

Доступные форматы

Super Resolution using Convolutional Neural Network

Submitted in fulfillment of the requirements of

Submitted by : Submitted to:

Department of Computer Science and Engineering

from model import Model

with tf.Session() as sess:

def __init__(self, config):

def prelu(self, _x, i):

return tf.nn.relu(_x) - alphas * tf.nn.relu(-_x)

def loss(self, Y, X):

def __init__(self, config):

conv = tf.contrib.layers.conv2d(features, d, 3, 1, 'SAME', 'NHWC', activation_fn=None,

for i in range(1, m+1):

sa = tf.contrib.layers.separable_conv2d(res, None, 3, 1, 1, 'SAME', 'NHWC',

ca = tf.reduce_mean(tf.square(res), [1, 2], True) - tf.square(tf.reduce_mean(res, [1,

conv = tf.add(conv, tf.add(res, tf.multiply(res, tf.sigmoid(tf.add(sa, ca)))))

conv = tf.concat([conv, shortcuts], -1)

conv = tf.contrib.layers.conv2d(conv, d, 3, 1, 'SAME', 'NHWC', activation_fn=None,

def loss(self, Y, X):

def preprocess(path, scale=3, distort=False):

(width, height) = image.size

(new_width, new_height) = width // scale, height // scale

if distort==True and randrange(3) == 0:

input_ = np.frombuffer(scaled_image.tobytes(), dtype=np.uint8).reshape((new_height,

return input_ / 255, label_ / 255

def prepare_data(sess, dataset):

def modcrop(image, scale=3):

We need to find modulo of height (and width) and scale factor.

input_, label_ = preprocess(image_data, scale, distort=distort)

for x in range(0, h - image_size + 1, stride):

sub_input = sub_input.reshape([image_size, image_size, 1])

return [single_input_sequence, single_label_sequence]

# Load data path

# Initialize multiprocessing pool with # of processes = config.threads

# Distribute |images_per_thread| images across each worker process

print("All worker processes done!")

sub_input_sequence, sub_label_sequence = [], []

for image in range(len(results)):

return (arrdata, arrlabel)

# Load data path

sub_input_sequence, sub_label_sequence = [], []

for x in range(0, h - image_size + 1, stride):

sub_input = sub_input.reshape([image_size, image_size, 1])

return (arrdata, arrlabel)

# Load data path

input_, label_ = preprocess(data[2], config.scale)

arrdata = np.pad(input_.reshape([1, h, w, 1]), ((0,0),(2,2),(2,2),(0,0)), 'reflect')

arrlabel = label_.reshape([1, h, w, 1])

return (arrdata, arrlabel)

def merge(config, Y):

data = prepare_data(config.sess, dataset="Test")

img = np.concatenate((Y, CbCr), axis=-1)

def save_params(sess, params):

h = open(param_dir + "weights{}.txt".format('_'.join(str(i) for i in params)), 'w')

variables = dict((var.name, sess.run(var)) for var in tf.trainable_variables())

for name, weights in variables.items():

def array_image_save(array, image_path):

def _tf_fspecial_gauss(size, sigma):

x_data = np.expand_dims(x_data, axis=-1)

y_data = np.expand_dims(y_data, axis=-1)

def tf_ssim(img1, img2, cs_map=False, mean_metric=True, sigma=1.5):

def tf_ms_ssim(img1, img2, sigma=1.5, weights=[0.1, 0.9]):

value = tf.reduce_sum(tf.multiply(tf.stack(mssim), weights))

def bilinear_upsample_weights(factor, channels):

def init(self, config):

def init(self, config):

def init(self, sess, config):

def init(self, config):

def init(self, config):