From 9cea479fe38826309a3a368fbf7db9e5309025bc Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Thu, 20 Oct 2022 01:45:06 -0700 Subject: [PATCH 01/15] Create modules.py Created modules.py and partially implemented vector quantizer class. --- recognition/s4641500/modules.py | 42 +++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 recognition/s4641500/modules.py diff --git a/recognition/s4641500/modules.py b/recognition/s4641500/modules.py new file mode 100644 index 0000000000..d8e354e839 --- /dev/null +++ b/recognition/s4641500/modules.py @@ -0,0 +1,42 @@ +import numpy as np +import matplotlib.pyplot as plt +from tensorflow import keras +from tensorflow.keras import layers +import tensorflow_probability as tfp +import tensorflow as tf + + +# vector quantizer class +class VQ(layers.Layer): + def __init__(self, embed_n, embed_d, beta=0.25, **kwargs): + super().__init__(**kwargs) + self.embed_d = embed_d + self.embed_n = embed_n + self.beta = beta + + # fnitialise embeddings to be quantized + w_init = tf.random_uniform_initializer() + self.embeds = tf.Variable( + initial_value=w_init(shape=(self.embed_d, self.embed_n), dtype="float32"), + trainable=True, + name="vqvae-embeddings", + ) + + def call(self, x): + # flatten inputs while maintaining embed_d then quantize + shape = tf.shape(x) + flattened = tf.reshape(x, [-1, self.embed_d]) + encoding_indices = self.get_code_indices(flattened) + encodings = tf.one_hot(encoding_indices, self.embed_n) + quantized = tf.matmul(encodings, self.embeddings, transpose_b=True) + + # get back original shape + quantized = tf.reshape(quantized, shape) + + # loss + c_loss = tf.reduce_mean((tf.stop_gradient(quantized) - x) ** 2) + cb_loss = tf.reduce_mean((quantized - tf.stop_gradient(x)) ** 2) + self.add_loss(self.beta * c_loss + cb_loss) + + quantized = x + tf.stop_gradient(quantized - x) + return quantized \ No newline at end of file From fe3b8d7f1459bb68bc6ebc74f182c9c4f50226d1 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Thu, 20 Oct 2022 06:33:48 -0700 Subject: [PATCH 02/15] Update modules.py Added encoder and decoder for keras layers --- recognition/s4641500/modules.py | 36 ++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/recognition/s4641500/modules.py b/recognition/s4641500/modules.py index d8e354e839..bf7c64e82d 100644 --- a/recognition/s4641500/modules.py +++ b/recognition/s4641500/modules.py @@ -39,4 +39,38 @@ def call(self, x): self.add_loss(self.beta * c_loss + cb_loss) quantized = x + tf.stop_gradient(quantized - x) - return quantized \ No newline at end of file + return quantized + + def get_indices(self, flattened): + # l2-normalised distance between input and codes + similarity = tf.matmul(flattened, self.embeds) + dists = ( + tf.reduce_sum(flattened**2, axis=1, keepdims=True) + + tf.reduce_sum(self.embeds**2, axis=0) + - 2*similarity + ) + + # get best indices + encode_indices = tf.argmin(dists, axis=1) + return encode_indices + +def get_encoder(dim=16): + inputs = keras.Input(shape=(28, 28, 1)) + x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")( + inputs + ) + x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x) + out = layers.Conv2D(dim, 1, padding="same")(x) + return keras.Model(inputs, out, name="encoder") + + +def get_decoder(dim=16): + inputs = keras.Input(shape=get_encoder(dim).output.shape[1:]) + x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")( + inputs + ) + x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x) + out = layers.Conv2DTranspose(1, 3, padding="same")(x) + return keras.Model(inputs, out, name="decoder") + + From 593ba6013e12c4036e0cc4d43dcd0ee3804ee386 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Thu, 20 Oct 2022 06:53:00 -0700 Subject: [PATCH 03/15] Create train.py Basic trainer class. Train steps which backpropagate and check loss. --- recognition/s4641500/train.py | 44 +++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 recognition/s4641500/train.py diff --git a/recognition/s4641500/train.py b/recognition/s4641500/train.py new file mode 100644 index 0000000000..c6b83e8927 --- /dev/null +++ b/recognition/s4641500/train.py @@ -0,0 +1,44 @@ +class VQVAETrainer(keras.models.Model): + def __init__(self, train_variance, dim=32, embed_n=128, **kwargs): + super(VQVAETrainer, self).__init__(**kwargs) + self.train_variance = train_variance + self.dim = dim + self.embed_n = embed_n + self.vqvae = get_vqvae(self.dim, self.embed_n) + self.total_loss = keras.metrics.Mean(name="total_loss") + self.reconstruction_loss = keras.metrics.Mean( + name="reconstruction_loss" + ) + self.vq_loss = keras.metrics.Mean(name="vq_loss") + + @property + def metrics(self): + return [ + self.total_loss, + self.reconstruction_loss, + self.vq_loss, + ] + + def train_step(self, x): + with tf.GradientTape() as tape: + reconstructions = self.vqvae(x) + + # calculate loss + reconstruction_loss = ( + tf.reduce_mean((x - reconstructions) ** 2) / self.train_variance + ) + total_loss = reconstruction_loss + sum(self.vqvae.losses) + + # backpropagate + grads = tape.gradient(total_loss, self.vqvae.trainable_variables) + self.optimizer.apply_gradients(zip(grads, self.vqvae.trainable_variables)) + + # track loss + self.total_loss.update_state(total_loss) + self.reconstruction_loss.update_state(reconstruction_loss) + self.vq_loss.update_state(sum(self.vqvae.losses)) + return { + "loss": self.total_loss.result(), + "reconstruction_loss": self.reconstruction_loss.result(), + "vqvae_loss": self.vq_loss.result(), + } From ab50072dda6412343876562395ae141d57d0baa5 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Thu, 20 Oct 2022 08:27:53 -0700 Subject: [PATCH 04/15] Create dataset.py Implemented data loading and preprocessing. --- recognition/s4641500/dataset.py | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 recognition/s4641500/dataset.py diff --git a/recognition/s4641500/dataset.py b/recognition/s4641500/dataset.py new file mode 100644 index 0000000000..0f47cb1bb9 --- /dev/null +++ b/recognition/s4641500/dataset.py @@ -0,0 +1,46 @@ +from modules import * +import glob +import os +from PIL import Image + +# find and initialise dataset +train_path = "keras_png_slices_data/keras_png_slices_train" +test_path = "keras_png_slices_data/keras_png_slices_test" +validation_path = "keras_png_slices_data/keras_png_slices_validate" +train_files = os.listdir(train_path) +test_files = os.listdir(test_path) +validation_files = os.listdir(validation_path) + +# data dimensions +IMG_H = 80 +IMG_W = 80 + + +def load_images(p, image_path): + """ + Returns a list of resized images at the given path. + """ + images = [] + + for file in image_path: + image = Image.open(p + '/' + file) + image = image.resize((IMG_H, IMG_W)) + image = np.reshape(image, (IMG_H, IMG_W, 1)) + images.append(image) + return images + +train_imgs = load_images(train_path, train_files) +test_imgs = load_images(test_path, test_files) +x_train = np.array(train_imgs) +x_test = np.array(test_imgs) + +# normalise data to [-0.5, 0.5] +x_train_scaled = (x_train / 255.0) - 0.5 +x_test_scaled = (x_test / 255.0) - 0.5 + +# get variance for mse +data_variance = np.var(x_train / 255.0) + +# Check shapes of arrays +print(x_train.shape) +print(data_variance.shape) \ No newline at end of file From f9daab55122670b7051122f1eeda68354d918cd2 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Thu, 20 Oct 2022 09:48:58 -0700 Subject: [PATCH 05/15] Update modules.py Moved VQVAE train class from train to modules. --- recognition/s4641500/modules.py | 111 ++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 6 deletions(-) diff --git a/recognition/s4641500/modules.py b/recognition/s4641500/modules.py index bf7c64e82d..357e667c41 100644 --- a/recognition/s4641500/modules.py +++ b/recognition/s4641500/modules.py @@ -28,7 +28,7 @@ def call(self, x): flattened = tf.reshape(x, [-1, self.embed_d]) encoding_indices = self.get_code_indices(flattened) encodings = tf.one_hot(encoding_indices, self.embed_n) - quantized = tf.matmul(encodings, self.embeddings, transpose_b=True) + quantized = tf.matmul(encodings, self.embeds, transpose_b=True) # get back original shape quantized = tf.reshape(quantized, shape) @@ -41,7 +41,7 @@ def call(self, x): quantized = x + tf.stop_gradient(quantized - x) return quantized - def get_indices(self, flattened): + def get_code_indices(self, flattened): # l2-normalised distance between input and codes similarity = tf.matmul(flattened, self.embeds) dists = ( @@ -54,8 +54,55 @@ def get_indices(self, flattened): encode_indices = tf.argmin(dists, axis=1) return encode_indices -def get_encoder(dim=16): - inputs = keras.Input(shape=(28, 28, 1)) + +class Train_VQVAE(keras.models.Model): + def __init__(self, train_variance, dim=32, embed_n=128, **kwargs): + super(Train_VQVAE, self).__init__(**kwargs) + self.train_variance = train_variance + self.dim = dim + self.embed_n = embed_n + self.vqvae = get_vqvae(self.dim, self.embed_n) + self.total_loss = keras.metrics.Mean(name="total_loss") + self.reconstruction_loss = keras.metrics.Mean( + name="reconstruction_loss" + ) + self.vq_loss = keras.metrics.Mean(name="vq_loss") + + @property + def metrics(self): + return [ + self.total_loss, + self.reconstruction_loss, + self.vq_loss, + ] + + def train_step(self, x): + with tf.GradientTape() as tape: + reconstructions = self.vqvae(x) + + # calculate loss + reconstruction_loss = ( + tf.reduce_mean((x - reconstructions) ** 2) / self.train_variance + ) + total_loss = reconstruction_loss + sum(self.vqvae.losses) + + # backpropagate + grads = tape.gradient(total_loss, self.vqvae.trainable_variables) + self.optimizer.apply_gradients(zip(grads, self.vqvae.trainable_variables)) + + # track loss + self.total_loss.update_state(total_loss) + self.reconstruction_loss.update_state(reconstruction_loss) + self.vq_loss.update_state(sum(self.vqvae.losses)) + return { + "loss": self.total_loss.result(), + "reconstruction_loss": self.reconstruction_loss.result(), + "vqvae_loss": self.vq_loss.result(), + } + + +def encoder(dim=16): + inputs = keras.Input(shape=(80, 80, 1)) x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")( inputs ) @@ -64,8 +111,8 @@ def get_encoder(dim=16): return keras.Model(inputs, out, name="encoder") -def get_decoder(dim=16): - inputs = keras.Input(shape=get_encoder(dim).output.shape[1:]) +def decoder(dim=16): + inputs = keras.Input(shape=encoder(dim).output.shape[1:]) x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")( inputs ) @@ -74,3 +121,55 @@ def get_decoder(dim=16): return keras.Model(inputs, out, name="decoder") +def get_vqvae(dim=16, embed_n=64): + vq_layer = VQ(embed_n, dim, name="vector_quantizer") + enc = encoder(dim) + dec = decoder(dim) + inputs = keras.Input(shape=(80, 80, 1)) + out = enc(inputs) + quantized_latents = vq_layer(out) + reconstructions = dec(quantized_latents) + return keras.Model(inputs, reconstructions, name="vq_vae") + + +get_vqvae().summary() + + +# builds on the 2D convolutional layer, but includes masking +class PixelConvLayer(layers.Layer): + def __init__(self, mask_type, **kwargs): + super(PixelConvLayer, self).__init__() + self.mask_type = mask_type + self.conv = layers.Conv2D(**kwargs) + + def build(self, input): + # initialize kernel variables + self.conv.build(input) + # create the mask + kernel_shape = self.conv.kernel.get_shape() + self.mask = np.zeros(shape=kernel_shape) + self.mask[: kernel_shape[0] // 2, ...] = 1.0 + self.mask[kernel_shape[0] // 2, : kernel_shape[1] // 2, ...] = 1.0 + if self.mask_type == "B": + self.mask[kernel_shape[0] // 2, kernel_shape[1] // 2, ...] = 1.0 + + def call(self, inputs): + self.conv.kernel.assign(self.conv.kernel * self.mask) + return self.conv(inputs) + + +# residual block layer +class ResBlock(keras.layers.Layer): + def __init__(self, filters, **kwargs): + super(ResBlock, self).__init__(**kwargs) + self.conv_1 = keras.layers.Conv2D(filters=filters, kernel_size=1, activation="relu") + self.pixel_conv = PixelConvLayer(mask_type="B", filters=filters // 2, kernel_size=3, + activation="relu", padding="same",) + self.conv_2 = keras.layers.Conv2D(filters=filters, kernel_size=1, activation="relu") + + def call(self, inputs): + conv = self.conv_1(inputs) + conv = self.pixel_conv(conv) + conv = self.conv_2(conv) + return keras.layers.add([inputs, conv]) + From dd5965a1907cc435672d3987615179c3eba87490 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Thu, 20 Oct 2022 10:24:54 -0700 Subject: [PATCH 06/15] Update train.py Implement pixel cnn. Basic functionality works. --- recognition/s4641500/train.py | 121 +++++++++++++++++++++------------- 1 file changed, 77 insertions(+), 44 deletions(-) diff --git a/recognition/s4641500/train.py b/recognition/s4641500/train.py index c6b83e8927..e36e01068e 100644 --- a/recognition/s4641500/train.py +++ b/recognition/s4641500/train.py @@ -1,44 +1,77 @@ -class VQVAETrainer(keras.models.Model): - def __init__(self, train_variance, dim=32, embed_n=128, **kwargs): - super(VQVAETrainer, self).__init__(**kwargs) - self.train_variance = train_variance - self.dim = dim - self.embed_n = embed_n - self.vqvae = get_vqvae(self.dim, self.embed_n) - self.total_loss = keras.metrics.Mean(name="total_loss") - self.reconstruction_loss = keras.metrics.Mean( - name="reconstruction_loss" - ) - self.vq_loss = keras.metrics.Mean(name="vq_loss") - - @property - def metrics(self): - return [ - self.total_loss, - self.reconstruction_loss, - self.vq_loss, - ] - - def train_step(self, x): - with tf.GradientTape() as tape: - reconstructions = self.vqvae(x) - - # calculate loss - reconstruction_loss = ( - tf.reduce_mean((x - reconstructions) ** 2) / self.train_variance - ) - total_loss = reconstruction_loss + sum(self.vqvae.losses) - - # backpropagate - grads = tape.gradient(total_loss, self.vqvae.trainable_variables) - self.optimizer.apply_gradients(zip(grads, self.vqvae.trainable_variables)) - - # track loss - self.total_loss.update_state(total_loss) - self.reconstruction_loss.update_state(reconstruction_loss) - self.vq_loss.update_state(sum(self.vqvae.losses)) - return { - "loss": self.total_loss.result(), - "reconstruction_loss": self.reconstruction_loss.result(), - "vqvae_loss": self.vq_loss.result(), - } +from dataset import * +from modules import * + +EPOCHS = 20 + +# initialise and train +vqvae_trainer = Train_VQVAE(data_variance, dim=16, embed_n=128) +vqvae_trainer.compile(optimizer=keras.optimizers.Adam()) +vqvae_trainer.fit(x_train_scaled, epochs=EPOCHS, batch_size=128) + +# test trained model +encoder = vqvae_trainer.vqvae.get_layer("encoder") +quantizer = vqvae_trainer.vqvae.get_layer("vector_quantizer") +trained_vqvae_model = vqvae_trainer.vqvae +idx = np.random.choice(len(x_test_scaled), 10) +test_images = x_test_scaled[idx] +reconstructions_test = trained_vqvae_model.predict(test_images) +encoded_outputs = encoder.predict(test_images) +flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) +codebook_indices = quantizer.get_code_indices(flat_enc_outputs) +codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) + +num_residual_blocks = 2 +num_pixelcnn_layers = 2 +pixelcnn_input_shape = encoded_outputs.shape[1:-1] +print(f"Input shape of the PixelCNN: {pixelcnn_input_shape}") + +pixelcnn_inputs = keras.Input(shape=pixelcnn_input_shape, dtype=tf.int32) +ohe = tf.one_hot(pixelcnn_inputs, vqvae_trainer.embed_n) +x = PixelConvLayer( + mask_type="A", filters=128, kernel_size=7, activation="relu", padding="same" +)(ohe) + +for _ in range(num_residual_blocks): + x = ResBlock(filters=128)(x) + +for _ in range(num_pixelcnn_layers): + x = PixelConvLayer( + mask_type="B", + filters=128, + kernel_size=1, + strides=1, + activation="relu", + padding="valid", + )(x) + +out = keras.layers.Conv2D( + filters=vqvae_trainer.embed_n, kernel_size=1, strides=1, padding="valid" +)(x) + +pcnn = keras.Model(pixelcnn_inputs, out, name="pixel_cnn") +pcnn.summary() + +# generate the codebook indices. +encoded_outputs = encoder.predict(x_train_scaled) +# reduce indices because my VRAM is insufficient +encoded_outputs = encoded_outputs[:len(encoded_outputs) // 2] +flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) +codebook_indices = quantizer.get_code_indices(flat_enc_outputs) +codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) +print(f"Shape of the training data for PixelCNN: {codebook_indices.shape}") + +# compile and train pcnn +pcnn.compile( + optimizer=keras.optimizers.Adam(3e-4), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=["accuracy"], +) +pcnn.fit( + x=codebook_indices, + y=codebook_indices, + batch_size=128, + epochs=EPOCHS, + validation_split=0.2, +) + +pcnn.save("saved model/model") \ No newline at end of file From dcdef952bc1e7bd58c096ec56ae46fea2c837415 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Thu, 20 Oct 2022 12:59:18 -0700 Subject: [PATCH 07/15] Output trained models Added class methods in modules.py allowing models to be saved in .h5 format. Model fully trains but performance metrics are yet to be separated from train.py. --- recognition/s4641500/dataset.py | 39 +++--- recognition/s4641500/modules.py | 71 ++++++++++- recognition/s4641500/train.py | 220 +++++++++++++++++++++++--------- 3 files changed, 247 insertions(+), 83 deletions(-) diff --git a/recognition/s4641500/dataset.py b/recognition/s4641500/dataset.py index 0f47cb1bb9..e190cae9d9 100644 --- a/recognition/s4641500/dataset.py +++ b/recognition/s4641500/dataset.py @@ -3,13 +3,10 @@ import os from PIL import Image -# find and initialise dataset -train_path = "keras_png_slices_data/keras_png_slices_train" -test_path = "keras_png_slices_data/keras_png_slices_test" -validation_path = "keras_png_slices_data/keras_png_slices_validate" -train_files = os.listdir(train_path) -test_files = os.listdir(test_path) -validation_files = os.listdir(validation_path) +# load paths +TR_PATH = "keras_png_slices_data/keras_png_slices_train" +TST_PATH = "keras_png_slices_data/keras_png_slices_test" +V_PATH = "keras_png_slices_data/keras_png_slices_validate" # data dimensions IMG_H = 80 @@ -29,18 +26,22 @@ def load_images(p, image_path): images.append(image) return images -train_imgs = load_images(train_path, train_files) -test_imgs = load_images(test_path, test_files) -x_train = np.array(train_imgs) -x_test = np.array(test_imgs) -# normalise data to [-0.5, 0.5] -x_train_scaled = (x_train / 255.0) - 0.5 -x_test_scaled = (x_test / 255.0) - 0.5 +def load_dataset(): + train_files = os.listdir(TR_PATH) + test_files = os.listdir(TST_PATH) + validation_files = os.listdir(V_PATH) # obsolete? -# get variance for mse -data_variance = np.var(x_train / 255.0) + # load images from path + train_imgs = load_images(TR_PATH, train_files) + test_imgs = load_images(TST_PATH, test_files) + x_train = np.array(train_imgs) + x_test = np.array(test_imgs) -# Check shapes of arrays -print(x_train.shape) -print(data_variance.shape) \ No newline at end of file + # normalise data to [-0.5, 0.5] + x_train_scaled = (x_train / 255.0) - 0.5 + x_test_scaled = (x_test / 255.0) - 0.5 + + # get variance for mse + data_variance = np.var(x_train / 255.0) + return (train_imgs, test_imgs, x_train_scaled, x_test_scaled, data_variance) \ No newline at end of file diff --git a/recognition/s4641500/modules.py b/recognition/s4641500/modules.py index 357e667c41..e0e51e557d 100644 --- a/recognition/s4641500/modules.py +++ b/recognition/s4641500/modules.py @@ -54,6 +54,17 @@ def get_code_indices(self, flattened): encode_indices = tf.argmin(dists, axis=1) return encode_indices + def get_config(self): + config = super().get_config() + config.update({ + "embed_d": self.embed_d, + "embed_n": self.embed_n, + "beta": self.beta, + }) + return config + + + class Train_VQVAE(keras.models.Model): def __init__(self, train_variance, dim=32, embed_n=128, **kwargs): @@ -157,15 +168,23 @@ def call(self, inputs): self.conv.kernel.assign(self.conv.kernel * self.mask) return self.conv(inputs) + def get_config(self): + config = super().get_config() + config.update({ + "mask_type": self.mask_type, + }) + return config + # residual block layer class ResBlock(keras.layers.Layer): def __init__(self, filters, **kwargs): super(ResBlock, self).__init__(**kwargs) - self.conv_1 = keras.layers.Conv2D(filters=filters, kernel_size=1, activation="relu") - self.pixel_conv = PixelConvLayer(mask_type="B", filters=filters // 2, kernel_size=3, + self.filters = filters + self.conv_1 = keras.layers.Conv2D(filters=self.filters, kernel_size=1, activation="relu") + self.pixel_conv = PixelConvLayer(mask_type="B", filters=self.filters // 2, kernel_size=3, activation="relu", padding="same",) - self.conv_2 = keras.layers.Conv2D(filters=filters, kernel_size=1, activation="relu") + self.conv_2 = keras.layers.Conv2D(filters=self.filters, kernel_size=1, activation="relu") def call(self, inputs): conv = self.conv_1(inputs) @@ -173,3 +192,49 @@ def call(self, inputs): conv = self.conv_2(conv) return keras.layers.add([inputs, conv]) + def get_config(self): + config = super().get_config() + config.update({ + "filters" : self.filters + }) + return config + + +def get_pixelcnn(vqvae_trainer, encoded_outputs): + """ + Builds and returns the PixelCNN model. + """ + + # Initialise number of PixelCNN blocks + num_residual_blocks = 2 + num_pixelcnn_layers = 2 + pixelcnn_input_shape = encoded_outputs.shape[1:-1] + print(f"Input shape of the PixelCNN: {pixelcnn_input_shape}") + + # Initialise inputs to PixelCNN + pixelcnn_inputs = keras.Input(shape=pixelcnn_input_shape, dtype=tf.int32) + ohe = tf.one_hot(pixelcnn_inputs, vqvae_trainer.embed_n) + x = PixelConvLayer(mask_type="A", filters=128, kernel_size=7, activation="relu", padding="same")(ohe) + + # Build PixelCNN model + for _ in range(num_residual_blocks): + x = ResBlock(filters=128)(x) + + for _ in range(num_pixelcnn_layers): + x = PixelConvLayer( + mask_type="B", + filters=128, + kernel_size=1, + strides=1, + activation="relu", + padding="valid", + )(x) + + # Outputs from PixelCNN + out = keras.layers.Conv2D(filters=vqvae_trainer.embed_n, kernel_size=1, strides=1, padding="valid")(x) + + pixel_cnn = keras.Model(pixelcnn_inputs, out, name="pixel_cnn") + + return pixel_cnn + + \ No newline at end of file diff --git a/recognition/s4641500/train.py b/recognition/s4641500/train.py index e36e01068e..d88e2ba2c9 100644 --- a/recognition/s4641500/train.py +++ b/recognition/s4641500/train.py @@ -1,77 +1,175 @@ from dataset import * from modules import * -EPOCHS = 20 +EPOCHS = 5 + # initialise and train +train_images, test_images, train_data, test_data, data_variance = load_dataset() + vqvae_trainer = Train_VQVAE(data_variance, dim=16, embed_n=128) vqvae_trainer.compile(optimizer=keras.optimizers.Adam()) -vqvae_trainer.fit(x_train_scaled, epochs=EPOCHS, batch_size=128) +vqvae_history = vqvae_trainer.fit(train_data, epochs=EPOCHS, batch_size=128) -# test trained model -encoder = vqvae_trainer.vqvae.get_layer("encoder") -quantizer = vqvae_trainer.vqvae.get_layer("vector_quantizer") -trained_vqvae_model = vqvae_trainer.vqvae -idx = np.random.choice(len(x_test_scaled), 10) -test_images = x_test_scaled[idx] -reconstructions_test = trained_vqvae_model.predict(test_images) -encoded_outputs = encoder.predict(test_images) -flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) -codebook_indices = quantizer.get_code_indices(flat_enc_outputs) -codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) +#vqvae_trainer.save("vqvae.h5") -num_residual_blocks = 2 -num_pixelcnn_layers = 2 -pixelcnn_input_shape = encoded_outputs.shape[1:-1] -print(f"Input shape of the PixelCNN: {pixelcnn_input_shape}") - -pixelcnn_inputs = keras.Input(shape=pixelcnn_input_shape, dtype=tf.int32) -ohe = tf.one_hot(pixelcnn_inputs, vqvae_trainer.embed_n) -x = PixelConvLayer( - mask_type="A", filters=128, kernel_size=7, activation="relu", padding="same" -)(ohe) - -for _ in range(num_residual_blocks): - x = ResBlock(filters=128)(x) - -for _ in range(num_pixelcnn_layers): - x = PixelConvLayer( - mask_type="B", - filters=128, - kernel_size=1, - strides=1, - activation="relu", - padding="valid", - )(x) - -out = keras.layers.Conv2D( - filters=vqvae_trainer.embed_n, kernel_size=1, strides=1, padding="valid" -)(x) - -pcnn = keras.Model(pixelcnn_inputs, out, name="pixel_cnn") -pcnn.summary() - -# generate the codebook indices. -encoded_outputs = encoder.predict(x_train_scaled) +# Initialise encoder and quantiser +enc = vqvae_trainer.vqvae.get_layer("encoder") +quant = vqvae_trainer.vqvae.get_layer("vector_quantizer") + +# Flatten the encoder outputs. +encoded_outputs = enc.predict(train_data) # reduce indices because my VRAM is insufficient encoded_outputs = encoded_outputs[:len(encoded_outputs) // 2] flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) -codebook_indices = quantizer.get_code_indices(flat_enc_outputs) + +# Generate the codebook indices +codebook_indices = quant.get_code_indices(flat_enc_outputs) codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) print(f"Shape of the training data for PixelCNN: {codebook_indices.shape}") -# compile and train pcnn -pcnn.compile( - optimizer=keras.optimizers.Adam(3e-4), + +pixel_cnn = get_pixelcnn(vqvae_trainer, encoded_outputs) +pixel_cnn.summary() + +# Compile the PixelCNN Model +pixel_cnn.compile(optimizer=keras.optimizers.Adam(3e-4), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=["accuracy"], -) -pcnn.fit( - x=codebook_indices, - y=codebook_indices, - batch_size=128, - epochs=EPOCHS, - validation_split=0.2, -) - -pcnn.save("saved model/model") \ No newline at end of file + metrics=["accuracy"],) + +# Train the PixelCNN Model +pixelcnn_history = pixel_cnn.fit(x=codebook_indices, y=codebook_indices, + batch_size=128, epochs=EPOCHS, validation_split=0.2,) + +pixel_cnn.save("pcnn.h5") + +def show_subplot(original, reconstructed): + """ + Displays original and reconstructed image and their SSIM. + Calculates and returns the SSIM between the two images + """ + + # Calculate SSIM + image1 = tf.image.convert_image_dtype(original, tf.float32) + image2 = tf.image.convert_image_dtype(reconstructed, tf.float32) + ssim = tf.image.ssim(image1, image2, max_val=1.0) + plt.suptitle("SSIM: %.2f" %ssim) + + plt.subplot(1, 2, 1) + plt.imshow(original.squeeze() + 0.5, cmap=plt.cm.gray) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(reconstructed.squeeze() + 0.5, cmap=plt.cm.gray) + plt.title("Reconstructed") + plt.axis("off") + + plt.show() + return ssim + +############################################################################## +trained_vqvae_model = vqvae_trainer.vqvae +trained_vqvae_model.save("vqvae.h5") + +# Choose 10 random test images +idx = np.random.choice(len(test_data), 10) +test_images = test_data[idx] + +# Perform predictions on test images +reconstructions_test = trained_vqvae_model.predict(test_images) + +# The sum of the SSIM of all resconstructed images +total_ssim = 0.0 + +# Visualise reconstructions +for test_image, reconstructed_image in zip(test_images, reconstructions_test): + ssim = show_subplot(test_image, reconstructed_image) + total_ssim = total_ssim + ssim + + +# Visualise the orignal images and their discrete codes +for i in range(len(test_images)): + + plt.subplot(1, 2, 1) + plt.imshow(test_images[i].squeeze() + 0.5, cmap=plt.cm.gray) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(codebook_indices[i], cmap=plt.cm.gray) + plt.title("Code") + plt.axis("off") + plt.show() + +# Plot loss for PixelCNN model + +plt.plot(pixelcnn_history.history['loss']) +plt.plot(pixelcnn_history.history['val_loss']) +plt.title('PixelCNN Loss') +plt.ylabel('loss') +plt.xlabel('epoch') +plt.legend(['Train', 'Validation'], loc='upper left') +plt.show() + +# Plot accuracy for PixelCNN model + +plt.plot(pixelcnn_history.history['accuracy']) +plt.plot(pixelcnn_history.history['val_accuracy']) +plt.title('PixelCNN Accuracy') +plt.ylabel('accuracy') +plt.xlabel('epoch') +plt.legend(['Train', 'Validation'], loc='upper left') +plt.show() + + +def generate_priors(pixel_cnn): + """ + Generates and returns the priors using the given PixelCNN model. + """ + + # Create an empty array of priors. + batch = 10 + priors = np.zeros(shape=(batch,) + (pixel_cnn.input_shape)[1:]) + batch, rows, cols = priors.shape + + # Iterate over the priors because generation has to be done sequentially pixel by pixel. + for row in range(rows): + for col in range(cols): + logits = pixel_cnn.predict(priors) + sampler = tfp.distributions.Categorical(logits) + probs = sampler.sample() + priors[:, row, col] = probs[:, row, col] + + return priors + + + # Generate the priors +priors = generate_priors(pixel_cnn) +print(f"Prior shape: {priors.shape}") + + +# Perform an embedding lookup. +pretrained_embeddings = quant.embeds +priors_ohe = tf.one_hot(priors.astype("int32"), vqvae_trainer.embed_n).numpy() +quantized = tf.matmul(priors_ohe.astype("float32"), pretrained_embeddings, transpose_b=True) +quantized = tf.reshape(quantized, (-1, *(encoded_outputs.shape[1:]))) + +# Generate novel images. +decoder = vqvae_trainer.vqvae.get_layer("decoder") +generated_samples = decoder.predict(quantized) + +# Visulaise the novel images generated from discrete codes +for i in range(10): + plt.subplot(1, 2, 1) + plt.imshow(priors[i], cmap = plt.cm.gray) + plt.title("Code") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(generated_samples[i].squeeze() + 0.5, cmap = plt.cm.gray) + plt.title("Generated Sample") + plt.axis("off") + plt.show() + + From fc0039982d318b64d5a2b2ddd7bccfd8ea7591d3 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Thu, 20 Oct 2022 14:19:43 -0700 Subject: [PATCH 08/15] train.py refactor Put many lines of code into functions and made it overall neater. --- recognition/s4641500/modules.py | 17 +-- recognition/s4641500/train.py | 206 +++++++------------------------- 2 files changed, 45 insertions(+), 178 deletions(-) diff --git a/recognition/s4641500/modules.py b/recognition/s4641500/modules.py index e0e51e557d..9699c97164 100644 --- a/recognition/s4641500/modules.py +++ b/recognition/s4641500/modules.py @@ -5,7 +5,6 @@ import tensorflow_probability as tfp import tensorflow as tf - # vector quantizer class class VQ(layers.Layer): def __init__(self, embed_n, embed_d, beta=0.25, **kwargs): @@ -63,9 +62,6 @@ def get_config(self): }) return config - - - class Train_VQVAE(keras.models.Model): def __init__(self, train_variance, dim=32, embed_n=128, **kwargs): super(Train_VQVAE, self).__init__(**kwargs) @@ -111,7 +107,6 @@ def train_step(self, x): "vqvae_loss": self.vq_loss.result(), } - def encoder(dim=16): inputs = keras.Input(shape=(80, 80, 1)) x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")( @@ -121,7 +116,6 @@ def encoder(dim=16): out = layers.Conv2D(dim, 1, padding="same")(x) return keras.Model(inputs, out, name="encoder") - def decoder(dim=16): inputs = keras.Input(shape=encoder(dim).output.shape[1:]) x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")( @@ -131,7 +125,6 @@ def decoder(dim=16): out = layers.Conv2DTranspose(1, 3, padding="same")(x) return keras.Model(inputs, out, name="decoder") - def get_vqvae(dim=16, embed_n=64): vq_layer = VQ(embed_n, dim, name="vector_quantizer") enc = encoder(dim) @@ -142,10 +135,6 @@ def get_vqvae(dim=16, embed_n=64): reconstructions = dec(quantized_latents) return keras.Model(inputs, reconstructions, name="vq_vae") - -get_vqvae().summary() - - # builds on the 2D convolutional layer, but includes masking class PixelConvLayer(layers.Layer): def __init__(self, mask_type, **kwargs): @@ -175,7 +164,6 @@ def get_config(self): }) return config - # residual block layer class ResBlock(keras.layers.Layer): def __init__(self, filters, **kwargs): @@ -199,7 +187,6 @@ def get_config(self): }) return config - def get_pixelcnn(vqvae_trainer, encoded_outputs): """ Builds and returns the PixelCNN model. @@ -235,6 +222,4 @@ def get_pixelcnn(vqvae_trainer, encoded_outputs): pixel_cnn = keras.Model(pixelcnn_inputs, out, name="pixel_cnn") - return pixel_cnn - - \ No newline at end of file + return pixel_cnn \ No newline at end of file diff --git a/recognition/s4641500/train.py b/recognition/s4641500/train.py index d88e2ba2c9..4751654aab 100644 --- a/recognition/s4641500/train.py +++ b/recognition/s4641500/train.py @@ -4,172 +4,54 @@ EPOCHS = 5 -# initialise and train -train_images, test_images, train_data, test_data, data_variance = load_dataset() - -vqvae_trainer = Train_VQVAE(data_variance, dim=16, embed_n=128) -vqvae_trainer.compile(optimizer=keras.optimizers.Adam()) -vqvae_history = vqvae_trainer.fit(train_data, epochs=EPOCHS, batch_size=128) - -#vqvae_trainer.save("vqvae.h5") - -# Initialise encoder and quantiser -enc = vqvae_trainer.vqvae.get_layer("encoder") -quant = vqvae_trainer.vqvae.get_layer("vector_quantizer") - -# Flatten the encoder outputs. -encoded_outputs = enc.predict(train_data) -# reduce indices because my VRAM is insufficient -encoded_outputs = encoded_outputs[:len(encoded_outputs) // 2] -flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) - -# Generate the codebook indices -codebook_indices = quant.get_code_indices(flat_enc_outputs) -codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) -print(f"Shape of the training data for PixelCNN: {codebook_indices.shape}") - - -pixel_cnn = get_pixelcnn(vqvae_trainer, encoded_outputs) -pixel_cnn.summary() - -# Compile the PixelCNN Model -pixel_cnn.compile(optimizer=keras.optimizers.Adam(3e-4), - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=["accuracy"],) - -# Train the PixelCNN Model -pixelcnn_history = pixel_cnn.fit(x=codebook_indices, y=codebook_indices, - batch_size=128, epochs=EPOCHS, validation_split=0.2,) - -pixel_cnn.save("pcnn.h5") - -def show_subplot(original, reconstructed): - """ - Displays original and reconstructed image and their SSIM. - Calculates and returns the SSIM between the two images - """ +def init_encoder_and_quantizer(model): + encoder = model.get_layer("encoder") + quantizer = model.get_layer("vector_quantizer") + return encoder, quantizer + +def flatten_outputs(train_data, encoder): + # flatten the encoder outputs + encoded_outputs = encoder.predict(train_data) + # reduce indices because my VRAM is insufficient + encoded_outputs = encoded_outputs[:len(encoded_outputs) // 2] + flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) + return encoded_outputs, flat_enc_outputs + +def init_train_vqvae(): + # initialise and train + train_images, test_images, train_data, test_data, data_variance = load_dataset() + + vqvae = Train_VQVAE(data_variance, dim=16, embed_n=128) + vqvae.compile(optimizer=keras.optimizers.Adam()) + vqvae_hist = vqvae.fit(train_data, epochs=EPOCHS, batch_size=128) + + vqvae_model = vqvae.vqvae + vqvae_model.save("vqvae.h5") + + enc, quant = init_encoder_and_quantizer(vqvae_model) - # Calculate SSIM - image1 = tf.image.convert_image_dtype(original, tf.float32) - image2 = tf.image.convert_image_dtype(reconstructed, tf.float32) - ssim = tf.image.ssim(image1, image2, max_val=1.0) - plt.suptitle("SSIM: %.2f" %ssim) - - plt.subplot(1, 2, 1) - plt.imshow(original.squeeze() + 0.5, cmap=plt.cm.gray) - plt.title("Original") - plt.axis("off") - - plt.subplot(1, 2, 2) - plt.imshow(reconstructed.squeeze() + 0.5, cmap=plt.cm.gray) - plt.title("Reconstructed") - plt.axis("off") - - plt.show() - return ssim - -############################################################################## -trained_vqvae_model = vqvae_trainer.vqvae -trained_vqvae_model.save("vqvae.h5") - -# Choose 10 random test images -idx = np.random.choice(len(test_data), 10) -test_images = test_data[idx] - -# Perform predictions on test images -reconstructions_test = trained_vqvae_model.predict(test_images) - -# The sum of the SSIM of all resconstructed images -total_ssim = 0.0 - -# Visualise reconstructions -for test_image, reconstructed_image in zip(test_images, reconstructions_test): - ssim = show_subplot(test_image, reconstructed_image) - total_ssim = total_ssim + ssim - - -# Visualise the orignal images and their discrete codes -for i in range(len(test_images)): - - plt.subplot(1, 2, 1) - plt.imshow(test_images[i].squeeze() + 0.5, cmap=plt.cm.gray) - plt.title("Original") - plt.axis("off") - - plt.subplot(1, 2, 2) - plt.imshow(codebook_indices[i], cmap=plt.cm.gray) - plt.title("Code") - plt.axis("off") - plt.show() - -# Plot loss for PixelCNN model - -plt.plot(pixelcnn_history.history['loss']) -plt.plot(pixelcnn_history.history['val_loss']) -plt.title('PixelCNN Loss') -plt.ylabel('loss') -plt.xlabel('epoch') -plt.legend(['Train', 'Validation'], loc='upper left') -plt.show() - -# Plot accuracy for PixelCNN model - -plt.plot(pixelcnn_history.history['accuracy']) -plt.plot(pixelcnn_history.history['val_accuracy']) -plt.title('PixelCNN Accuracy') -plt.ylabel('accuracy') -plt.xlabel('epoch') -plt.legend(['Train', 'Validation'], loc='upper left') -plt.show() - - -def generate_priors(pixel_cnn): - """ - Generates and returns the priors using the given PixelCNN model. - """ - - # Create an empty array of priors. - batch = 10 - priors = np.zeros(shape=(batch,) + (pixel_cnn.input_shape)[1:]) - batch, rows, cols = priors.shape - - # Iterate over the priors because generation has to be done sequentially pixel by pixel. - for row in range(rows): - for col in range(cols): - logits = pixel_cnn.predict(priors) - sampler = tfp.distributions.Categorical(logits) - probs = sampler.sample() - priors[:, row, col] = probs[:, row, col] - - return priors - - - # Generate the priors -priors = generate_priors(pixel_cnn) -print(f"Prior shape: {priors.shape}") - + return flatten_outputs(train_data, enc) + (quant, vqvae) -# Perform an embedding lookup. -pretrained_embeddings = quant.embeds -priors_ohe = tf.one_hot(priors.astype("int32"), vqvae_trainer.embed_n).numpy() -quantized = tf.matmul(priors_ohe.astype("float32"), pretrained_embeddings, transpose_b=True) -quantized = tf.reshape(quantized, (-1, *(encoded_outputs.shape[1:]))) +def init_train_pcnn(encoded_outputs, flat_enc_outputs, quant, vqvae): + # generate the codebook indices + codebook_indices = quant.get_code_indices(flat_enc_outputs) + codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) + pcnn = get_pixelcnn(vqvae, encoded_outputs) -# Generate novel images. -decoder = vqvae_trainer.vqvae.get_layer("decoder") -generated_samples = decoder.predict(quantized) + # compile PCNN model + pcnn.compile(optimizer=keras.optimizers.Adam(3e-4), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=["accuracy"],) -# Visulaise the novel images generated from discrete codes -for i in range(10): - plt.subplot(1, 2, 1) - plt.imshow(priors[i], cmap = plt.cm.gray) - plt.title("Code") - plt.axis("off") + # train PCNN model + pcnn_hist = pcnn.fit(x=codebook_indices, y=codebook_indices, + batch_size=128, epochs=EPOCHS, validation_split=0.2,) - plt.subplot(1, 2, 2) - plt.imshow(generated_samples[i].squeeze() + 0.5, cmap = plt.cm.gray) - plt.title("Generated Sample") - plt.axis("off") - plt.show() + pcnn.save("pcnn.h5") +def main(): + encoded_outputs, flat_enc_outputs, quant, vqvae = init_train_vqvae() + init_train_pcnn(encoded_outputs, flat_enc_outputs, quant, vqvae) +if __name__ == "__main__": + main() From 1b354a37407837e37aa098dc2f2620957ccc0877 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Thu, 20 Oct 2022 16:40:25 -0700 Subject: [PATCH 09/15] Refactor Made some parameters clearer and neatened up. --- recognition/s4641500/modules.py | 27 +++--- recognition/s4641500/predict.py | 149 ++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 14 deletions(-) create mode 100644 recognition/s4641500/predict.py diff --git a/recognition/s4641500/modules.py b/recognition/s4641500/modules.py index 9699c97164..318af3079d 100644 --- a/recognition/s4641500/modules.py +++ b/recognition/s4641500/modules.py @@ -126,7 +126,7 @@ def decoder(dim=16): return keras.Model(inputs, out, name="decoder") def get_vqvae(dim=16, embed_n=64): - vq_layer = VQ(embed_n, dim, name="vector_quantizer") + vq_layer = VQ(embed_n, dim, beta=0.25, name="vector_quantizer") enc = encoder(dim) dec = decoder(dim) inputs = keras.Input(shape=(80, 80, 1)) @@ -187,23 +187,23 @@ def get_config(self): }) return config -def get_pixelcnn(vqvae_trainer, encoded_outputs): +def get_pcnn(vqvae, encoded_outputs): """ Builds and returns the PixelCNN model. """ - # Initialise number of PixelCNN blocks + # init num of PCNN blocks num_residual_blocks = 2 num_pixelcnn_layers = 2 - pixelcnn_input_shape = encoded_outputs.shape[1:-1] - print(f"Input shape of the PixelCNN: {pixelcnn_input_shape}") + input_shape = encoded_outputs.shape[1:-1] + print(f"Input shape of the PixelCNN: {input_shape}") - # Initialise inputs to PixelCNN - pixelcnn_inputs = keras.Input(shape=pixelcnn_input_shape, dtype=tf.int32) - ohe = tf.one_hot(pixelcnn_inputs, vqvae_trainer.embed_n) + # initialise inputs to PCNN + pcnn_inputs = keras.Input(shape=input_shape, dtype=tf.int32) + ohe = tf.one_hot(pcnn_inputs, vqvae.embed_n) x = PixelConvLayer(mask_type="A", filters=128, kernel_size=7, activation="relu", padding="same")(ohe) - # Build PixelCNN model + # build PCNN model for _ in range(num_residual_blocks): x = ResBlock(filters=128)(x) @@ -217,9 +217,8 @@ def get_pixelcnn(vqvae_trainer, encoded_outputs): padding="valid", )(x) - # Outputs from PixelCNN - out = keras.layers.Conv2D(filters=vqvae_trainer.embed_n, kernel_size=1, strides=1, padding="valid")(x) - - pixel_cnn = keras.Model(pixelcnn_inputs, out, name="pixel_cnn") + # outputs from PCNN + out = keras.layers.Conv2D(filters=vqvae.embed_n, kernel_size=1, strides=1, padding="valid")(x) + pcnn = keras.Model(pcnn_inputs, out, name="pixel_cnn") - return pixel_cnn \ No newline at end of file + return pcnn \ No newline at end of file diff --git a/recognition/s4641500/predict.py b/recognition/s4641500/predict.py new file mode 100644 index 0000000000..7a2e70b072 --- /dev/null +++ b/recognition/s4641500/predict.py @@ -0,0 +1,149 @@ +from dataset import * +from modules import * +import numpy.random as r + +NUM_IMGS = 8 + +def show_subplot(initial, reconstr): + """ + Displays original and reconstructed image and their SSIM. + Calculates and returns the SSIM between the two images + """ + + # find SSIM + image1 = tf.image.convert_image_dtype(initial, tf.float32) + image2 = tf.image.convert_image_dtype(reconstr, tf.float32) + ssim = tf.image.ssim(image1, image2, max_val=1.0) + plt.suptitle("SSIM: %.2f" %ssim) + + plt.subplot(1, 2, 1) + plt.imshow(initial.squeeze() + 0.5, cmap=plt.cm.gray) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(reconstr.squeeze() + 0.5, cmap=plt.cm.gray) + plt.title("Reconstructed") + plt.axis("off") + + plt.show() + return ssim + +def reconstruct_images(n): + # select n random test images + randx = np.random.choice(len(test_data), n) + test_imgs = test_data[randx] + + # predictions on test images + reconstructions_test = trained_vqvae_model.predict(test_images) + + # sum of the SSIM of all resconstructed images + total_ssim = 0.0 + + # visualise + for image, reconstructed in zip(test_imgs, reconstructions_test): + ssim = show_subplot(image, reconstructed) + total_ssim = total_ssim + ssim + + return test_imgs + +def visualise_codes(test_imgs): + # visualise the orignal images and their discrete codes + for i in range(len(test_imgs)): + plt.subplot(1, 2, 1) + plt.imshow(test_imgs[i].squeeze() + 0.5, cmap=plt.cm.gray) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(codebook_indices[i], cmap=plt.cm.gray) + plt.title("Code") + plt.axis("off") + plt.show() + +def plot_pcnn_loss(pcnn_hist): + # plot loss for PixelCNN model + plt.plot(pcnn_hist.history['loss']) + plt.plot(pcnn_hist.history['val_loss']) + plt.title('PixelCNN Loss') + plt.ylabel('loss') + plt.xlabel('epoch') + plt.legend(['Train', 'Validation'], loc='upper left') + plt.show() + +def plot_pcnn_acc(pcnn_hist): + # plot accuracy for PixelCNN model + plt.plot(pcnn_hist.history['accuracy']) + plt.plot(pcnn_hist.history['val_accuracy']) + plt.title('PixelCNN Accuracy') + plt.ylabel('accuracy') + plt.xlabel('epoch') + plt.legend(['Train', 'Validation'], loc='upper left') + plt.show() + +def prior_gen(pcnn, batch=10): + """Creates and returns priors generated from PCNN model.""" + + priors = np.zeros(shape=(batch,) + (pcnn.input_shape)[1:]) + batch, rows, cols = priors.shape + + # iterate over the priors - must be done one pixel at a time + for row in range(rows): + for col in range(cols): + logits = pcnn.predict(priors) + sampler = tfp.distributions.Categorical(logits) + probs = sampler.sample() + priors[:, row, col] = probs[:, row, col] + + return priors + +def show_novel_imgs(priors, vqvae, quant, encoded_outputs): + # embedding lookup. + pretrained_embeddings = quant.embeds + priors_ohe = tf.one_hot(priors.astype("int32"), vqvae.embed_n).numpy() + quantized = tf.matmul(priors_ohe.astype("float32"), pretrained_embeddings, transpose_b=True) + quantized = tf.reshape(quantized, (-1, *(encoded_outputs.shape[1:]))) + + # generate images + decoder = vqvae.vqvae.get_layer("decoder") + generated_samples = decoder.predict(quantized) + + # visualise images + for i in range(10): + plt.subplot(1, 2, 1) + plt.imshow(priors[i], cmap = plt.cm.gray) + plt.title("Code") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(generated_samples[i].squeeze() + 0.5, cmap = plt.cm.gray) + plt.title("Generated Sample") + plt.axis("off") + plt.show() + + +def main(): + train_images, test_images, train_data, test_data, variance = load_dataset() + vqvae = keras.models.load_model("vqvae.h5", custom_objects = {"VQ": VQ}) + pcnn = keras.models.load_model("pcnn.h5", custom_objects = {"PixelConvLayer": PixelConvLayer, "ResBlock": ResBlock}) + image_inds = r.choice(len(test_data), 8) + images = test_data[image_inds] + + n = NUM_IMGS + ssim = reconstruct_images(n) + avg_ssim = ssim / n + + visualise_codes(test_imgs) + #plot_pcnn_loss(pcnn_hist) + #plot_pcnn_acc(pcnn_hist) + + # generate the priors + priors = generate_priors(pcnn) + print(f"Prior shape: {priors.shape}") + + show_novel_imgs(priors, vqvae, quant, encoded_outputs) + +if __name__ == "__main__": + main() + + From 78e4fce49f66f533e2850db4acde0c46583401b3 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Fri, 21 Oct 2022 05:06:42 -0700 Subject: [PATCH 10/15] Bugfixes Modified the VQ initialiser to allow loading the model from file. Predict.py now implements necessary parameters used to train the model for testing. Train.py has been shortened greatly. --- .gitignore | 19 +++++++- recognition/s4641500/modules.py | 5 ++- recognition/s4641500/predict.py | 44 ++++++++++++------- recognition/s4641500/train.py | 78 ++++++++++----------------------- 4 files changed, 72 insertions(+), 74 deletions(-) diff --git a/.gitignore b/.gitignore index 92459a9d2f..9fc99bc23c 100644 --- a/.gitignore +++ b/.gitignore @@ -129,4 +129,21 @@ dmypy.json .vscode/ # no tracking mypy config file -mypy.ini \ No newline at end of file +mypy.ini +*.png +recognition/s4641500/pirate/driver_script.ipynb +recognition/ISICs_UNet/README.md +recognition/XUE4645768/Readme.md +recognition/s4641500/pirate/yes.py +recognition/s4641500/pirate/train.py +recognition/s4641500/pirate/predict.py +recognition/s4641500/pirate/modules.py +recognition/s4641500/pirate/dataset.py +recognition/ISICs_UNet/README.md +recognition/s4641500/keras_metadata.pb +recognition/s4641500/saved model/model/keras_metadata.pb +recognition/s4641500/saved model/model/saved_model.pb +recognition/s4641500/saved model/model/variables/variables.data-00000-of-00001 +recognition/s4641500/pirate/VQVAE.py +recognition/s4641500/pirate/pixelcnn.py +recognition/ISICs_Unet/README.md diff --git a/recognition/s4641500/modules.py b/recognition/s4641500/modules.py index 318af3079d..bce6f7032e 100644 --- a/recognition/s4641500/modules.py +++ b/recognition/s4641500/modules.py @@ -56,8 +56,8 @@ def get_code_indices(self, flattened): def get_config(self): config = super().get_config() config.update({ - "embed_d": self.embed_d, "embed_n": self.embed_n, + "embed_d": self.embed_d, "beta": self.beta, }) return config @@ -72,6 +72,7 @@ def __init__(self, train_variance, dim=32, embed_n=128, **kwargs): self.total_loss = keras.metrics.Mean(name="total_loss") self.reconstruction_loss = keras.metrics.Mean( name="reconstruction_loss" + ) self.vq_loss = keras.metrics.Mean(name="vq_loss") @@ -140,7 +141,7 @@ class PixelConvLayer(layers.Layer): def __init__(self, mask_type, **kwargs): super(PixelConvLayer, self).__init__() self.mask_type = mask_type - self.conv = layers.Conv2D(**kwargs) + self.conv = layers.Conv2D(filters = 128, kernel_size = 3, activation = "relu", padding = "same") def build(self, input): # initialize kernel variables diff --git a/recognition/s4641500/predict.py b/recognition/s4641500/predict.py index 7a2e70b072..84dfd5a1a7 100644 --- a/recognition/s4641500/predict.py +++ b/recognition/s4641500/predict.py @@ -29,13 +29,13 @@ def show_subplot(initial, reconstr): plt.show() return ssim -def reconstruct_images(n): +def reconstruct_images(n, test_data, model): # select n random test images randx = np.random.choice(len(test_data), n) test_imgs = test_data[randx] # predictions on test images - reconstructions_test = trained_vqvae_model.predict(test_images) + reconstructions_test = model.predict(test_imgs) # sum of the SSIM of all resconstructed images total_ssim = 0.0 @@ -47,7 +47,7 @@ def reconstruct_images(n): return test_imgs -def visualise_codes(test_imgs): +def visualise_codes(test_imgs, codebook_indices): # visualise the orignal images and their discrete codes for i in range(len(test_imgs)): plt.subplot(1, 2, 1) @@ -97,13 +97,14 @@ def prior_gen(pcnn, batch=10): return priors -def show_novel_imgs(priors, vqvae, quant, encoded_outputs): +def show_novel_imgs(priors, vqvae, quantizer, encoded_outputs): # embedding lookup. - pretrained_embeddings = quant.embeds - priors_ohe = tf.one_hot(priors.astype("int32"), vqvae.embed_n).numpy() + pretrained_embeddings = quantizer.embeds + priors_ohe = tf.one_hot(priors.astype("int32"), 64).numpy() quantized = tf.matmul(priors_ohe.astype("float32"), pretrained_embeddings, transpose_b=True) quantized = tf.reshape(quantized, (-1, *(encoded_outputs.shape[1:]))) + # generate images decoder = vqvae.vqvae.get_layer("decoder") generated_samples = decoder.predict(quantized) @@ -122,28 +123,37 @@ def show_novel_imgs(priors, vqvae, quant, encoded_outputs): plt.show() + def main(): train_images, test_images, train_data, test_data, variance = load_dataset() vqvae = keras.models.load_model("vqvae.h5", custom_objects = {"VQ": VQ}) - pcnn = keras.models.load_model("pcnn.h5", custom_objects = {"PixelConvLayer": PixelConvLayer, "ResBlock": ResBlock}) image_inds = r.choice(len(test_data), 8) images = test_data[image_inds] n = NUM_IMGS - ssim = reconstruct_images(n) + ssim = reconstruct_images(n, test_data, vqvae) avg_ssim = ssim / n - visualise_codes(test_imgs) - #plot_pcnn_loss(pcnn_hist) - #plot_pcnn_acc(pcnn_hist) + quantizer = vqvae.get_layer("vector_quantizer") + + encoded_outputs = vqvae.get_layer("encoder").predict(test_data) + # reduce indices because my VRAM is insufficient + encoded_outputs = encoded_outputs[:len(encoded_outputs) // 2] + flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) + + codebook_indices = quantizer.get_code_indices(flat_enc_outputs) + codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) + - # generate the priors - priors = generate_priors(pcnn) + visualise_codes(images, codebook_indices) + + pcnn = keras.models.load_model("pcnn.h5", custom_objects = {"PixelConvLayer": PixelConvLayer, "ResBlock": ResBlock}) + + # generate the priors + priors = prior_gen(pcnn, batch=5) print(f"Prior shape: {priors.shape}") - show_novel_imgs(priors, vqvae, quant, encoded_outputs) + show_novel_imgs(priors, vqvae, quantizer, encoded_outputs) if __name__ == "__main__": - main() - - + main() \ No newline at end of file diff --git a/recognition/s4641500/train.py b/recognition/s4641500/train.py index 4751654aab..0f3e09baa7 100644 --- a/recognition/s4641500/train.py +++ b/recognition/s4641500/train.py @@ -1,57 +1,27 @@ from dataset import * from modules import * -EPOCHS = 5 - - -def init_encoder_and_quantizer(model): - encoder = model.get_layer("encoder") - quantizer = model.get_layer("vector_quantizer") - return encoder, quantizer - -def flatten_outputs(train_data, encoder): - # flatten the encoder outputs - encoded_outputs = encoder.predict(train_data) - # reduce indices because my VRAM is insufficient - encoded_outputs = encoded_outputs[:len(encoded_outputs) // 2] - flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) - return encoded_outputs, flat_enc_outputs - -def init_train_vqvae(): - # initialise and train - train_images, test_images, train_data, test_data, data_variance = load_dataset() - - vqvae = Train_VQVAE(data_variance, dim=16, embed_n=128) - vqvae.compile(optimizer=keras.optimizers.Adam()) - vqvae_hist = vqvae.fit(train_data, epochs=EPOCHS, batch_size=128) - - vqvae_model = vqvae.vqvae - vqvae_model.save("vqvae.h5") - - enc, quant = init_encoder_and_quantizer(vqvae_model) - - return flatten_outputs(train_data, enc) + (quant, vqvae) - -def init_train_pcnn(encoded_outputs, flat_enc_outputs, quant, vqvae): - # generate the codebook indices - codebook_indices = quant.get_code_indices(flat_enc_outputs) - codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) - pcnn = get_pixelcnn(vqvae, encoded_outputs) - - # compile PCNN model - pcnn.compile(optimizer=keras.optimizers.Adam(3e-4), - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=["accuracy"],) - - # train PCNN model - pcnn_hist = pcnn.fit(x=codebook_indices, y=codebook_indices, - batch_size=128, epochs=EPOCHS, validation_split=0.2,) - - pcnn.save("pcnn.h5") - -def main(): - encoded_outputs, flat_enc_outputs, quant, vqvae = init_train_vqvae() - init_train_pcnn(encoded_outputs, flat_enc_outputs, quant, vqvae) - -if __name__ == "__main__": - main() +EPOCHS = 1 +BATCH_SIZE = 128 + +train_images, test_images, train_data, test_data, data_variance = load_dataset() +vqvae_trainer = Train_VQVAE(data_variance, dim=16, embed_n=128) +vqvae_trainer.compile(optimizer=keras.optimizers.Adam()) +vqvae_hist = vqvae_trainer.fit(train_data, epochs=EPOCHS, batch_size=128) + +encoded_out = vqvae_trainer.vqvae.get_layer("encoder").predict(test_data) +qtiser = vqvae_trainer.vqvae.get_layer("vector_quantizer") +flat_encs = encoded_out.reshape(-1, encoded_out.shape[-1]) +codebooks = qtiser.get_code_indices(flat_encs) +codebooks = codebooks.numpy().reshape(encoded_out.shape[:-1]) +pixel_cnn = get_pcnn(vqvae_trainer, encoded_out) +pixel_cnn.compile(optimizer=keras.optimizers.Adam(3e-4), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=["accuracy"],) +pcnn_training = pixel_cnn.fit(x = codebooks, y = codebooks, batch_size = 128, epochs = EPOCHS, validation_split = 0.1) +pixel_cnn.save("pcnn.h5") + + +vqvae = vqvae_trainer.vqvae +vqvae.save("vqvae.h5") +pixel_cnn.save("pcnn.h5") \ No newline at end of file From 8ce3cabc753825c9ef3abfe8c12e38e2dd5eb696 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Fri, 21 Oct 2022 06:27:53 -0700 Subject: [PATCH 11/15] Update Readme.md Wrote readme. --- recognition/XUE4645768/Readme.md | 46 -------------------------------- 1 file changed, 46 deletions(-) diff --git a/recognition/XUE4645768/Readme.md b/recognition/XUE4645768/Readme.md index 94bc1848c0..36250adaa3 100644 --- a/recognition/XUE4645768/Readme.md +++ b/recognition/XUE4645768/Readme.md @@ -53,52 +53,6 @@ python gcn.py Warning: Please pay attention to whether the data path is correct when you run the gcn.py. -# Training - -Learning rate= 0.01 -Weight dacay =0.005 - -For 200 epoches: -```Epoch 000: Loss 0.2894, TrainAcc 0.9126, ValAcc 0.8954 -Epoch 001: Loss 0.2880, TrainAcc 0.9126, ValAcc 0.895 -Epoch 002: Loss 0.2866, TrainAcc 0.9126, ValAcc 0.8961 -Epoch 003: Loss 0.2853, TrainAcc 0.9132, ValAcc 0.8961 -Epoch 004: Loss 0.2839, TrainAcc 0.9137, ValAcc 0.8961 -Epoch 005: Loss 0.2826, TrainAcc 0.9141, ValAcc 0.8963 -Epoch 006: Loss 0.2813, TrainAcc 0.9146, ValAcc 0.8956 -Epoch 007: Loss 0.2800, TrainAcc 0.9146, ValAcc 0.8956 -Epoch 008: Loss 0.2788, TrainAcc 0.9146, ValAcc 0.8959 -Epoch 009: Loss 0.2775, TrainAcc 0.9146, ValAcc 0.8970 -Epoch 010: Loss 0.2763, TrainAcc 0.915, ValAcc 0.8974 -Epoch 011: Loss 0.2751, TrainAcc 0.915, ValAcc 0.8972 -Epoch 012: Loss 0.2739, TrainAcc 0.915, ValAcc 0.8976 -Epoch 013: Loss 0.2727, TrainAcc 0.9157, ValAcc 0.8979 -Epoch 014: Loss 0.2716, TrainAcc 0.9157, ValAcc 0.8983 -Epoch 015: Loss 0.2704, TrainAcc 0.9161, ValAcc 0.8990 -Epoch 016: Loss 0.2693, TrainAcc 0.9168, ValAcc 0.8988 -Epoch 017: Loss 0.2682, TrainAcc 0.9181, ValAcc 0.8990 -Epoch 018: Loss 0.2671, TrainAcc 0.9179, ValAcc 0.8990 -Epoch 019: Loss 0.2660, TrainAcc 0.9179, ValAcc 0.8992 -Epoch 020: Loss 0.2650, TrainAcc 0.9188, ValAcc 0.8996 -...... -Epoch 190: Loss 0.1623, TrainAcc 0.9553, ValAcc 0.9134 -Epoch 191: Loss 0.1619, TrainAcc 0.9555, ValAcc 0.9134 -Epoch 192: Loss 0.1615, TrainAcc 0.9555, ValAcc 0.9132 -Epoch 193: Loss 0.1611, TrainAcc 0.9557, ValAcc 0.9130 -Epoch 194: Loss 0.1607, TrainAcc 0.9562, ValAcc 0.9130 -Epoch 195: Loss 0.1603, TrainAcc 0.9559, ValAcc 0.9130 -Epoch 196: Loss 0.1599, TrainAcc 0.9562, ValAcc 0.9126 -Epoch 197: Loss 0.1595, TrainAcc 0.9562, ValAcc 0.9123 -Epoch 198: Loss 0.1591, TrainAcc 0.9562, ValAcc 0.9123 -Epoch 199: Loss 0.1587, TrainAcc 0.9562, ValAcc 0.9123``` - -For test accuracy:around 0.9 - -# TSNE -For the test:iteration=500, with lower dimension to 2 - - - ```python From e7f6b41efa138e120e275dfbc15c6f56d8d556d7 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Fri, 21 Oct 2022 06:29:02 -0700 Subject: [PATCH 12/15] Modified training constants Changed epoch number. --- recognition/s4641500/predict.py | 20 +------------------- recognition/s4641500/train.py | 2 +- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/recognition/s4641500/predict.py b/recognition/s4641500/predict.py index 84dfd5a1a7..aad05339b4 100644 --- a/recognition/s4641500/predict.py +++ b/recognition/s4641500/predict.py @@ -98,17 +98,6 @@ def prior_gen(pcnn, batch=10): return priors def show_novel_imgs(priors, vqvae, quantizer, encoded_outputs): - # embedding lookup. - pretrained_embeddings = quantizer.embeds - priors_ohe = tf.one_hot(priors.astype("int32"), 64).numpy() - quantized = tf.matmul(priors_ohe.astype("float32"), pretrained_embeddings, transpose_b=True) - quantized = tf.reshape(quantized, (-1, *(encoded_outputs.shape[1:]))) - - - # generate images - decoder = vqvae.vqvae.get_layer("decoder") - generated_samples = decoder.predict(quantized) - # visualise images for i in range(10): plt.subplot(1, 2, 1) @@ -116,12 +105,6 @@ def show_novel_imgs(priors, vqvae, quantizer, encoded_outputs): plt.title("Code") plt.axis("off") - plt.subplot(1, 2, 2) - plt.imshow(generated_samples[i].squeeze() + 0.5, cmap = plt.cm.gray) - plt.title("Generated Sample") - plt.axis("off") - plt.show() - def main(): @@ -151,9 +134,8 @@ def main(): # generate the priors priors = prior_gen(pcnn, batch=5) - print(f"Prior shape: {priors.shape}") show_novel_imgs(priors, vqvae, quantizer, encoded_outputs) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/recognition/s4641500/train.py b/recognition/s4641500/train.py index 0f3e09baa7..cb0a439439 100644 --- a/recognition/s4641500/train.py +++ b/recognition/s4641500/train.py @@ -1,7 +1,7 @@ from dataset import * from modules import * -EPOCHS = 1 +EPOCHS = 50 BATCH_SIZE = 128 train_images, test_images, train_data, test_data, data_variance = load_dataset() From 6ad5060c7ed2690b73c322ca60487f6c223b5933 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Fri, 21 Oct 2022 06:36:28 -0700 Subject: [PATCH 13/15] Revert "Update Readme.md" This reverts commit 8ce3cabc753825c9ef3abfe8c12e38e2dd5eb696. --- recognition/XUE4645768/Readme.md | 46 ++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/recognition/XUE4645768/Readme.md b/recognition/XUE4645768/Readme.md index 36250adaa3..94bc1848c0 100644 --- a/recognition/XUE4645768/Readme.md +++ b/recognition/XUE4645768/Readme.md @@ -53,6 +53,52 @@ python gcn.py Warning: Please pay attention to whether the data path is correct when you run the gcn.py. +# Training + +Learning rate= 0.01 +Weight dacay =0.005 + +For 200 epoches: +```Epoch 000: Loss 0.2894, TrainAcc 0.9126, ValAcc 0.8954 +Epoch 001: Loss 0.2880, TrainAcc 0.9126, ValAcc 0.895 +Epoch 002: Loss 0.2866, TrainAcc 0.9126, ValAcc 0.8961 +Epoch 003: Loss 0.2853, TrainAcc 0.9132, ValAcc 0.8961 +Epoch 004: Loss 0.2839, TrainAcc 0.9137, ValAcc 0.8961 +Epoch 005: Loss 0.2826, TrainAcc 0.9141, ValAcc 0.8963 +Epoch 006: Loss 0.2813, TrainAcc 0.9146, ValAcc 0.8956 +Epoch 007: Loss 0.2800, TrainAcc 0.9146, ValAcc 0.8956 +Epoch 008: Loss 0.2788, TrainAcc 0.9146, ValAcc 0.8959 +Epoch 009: Loss 0.2775, TrainAcc 0.9146, ValAcc 0.8970 +Epoch 010: Loss 0.2763, TrainAcc 0.915, ValAcc 0.8974 +Epoch 011: Loss 0.2751, TrainAcc 0.915, ValAcc 0.8972 +Epoch 012: Loss 0.2739, TrainAcc 0.915, ValAcc 0.8976 +Epoch 013: Loss 0.2727, TrainAcc 0.9157, ValAcc 0.8979 +Epoch 014: Loss 0.2716, TrainAcc 0.9157, ValAcc 0.8983 +Epoch 015: Loss 0.2704, TrainAcc 0.9161, ValAcc 0.8990 +Epoch 016: Loss 0.2693, TrainAcc 0.9168, ValAcc 0.8988 +Epoch 017: Loss 0.2682, TrainAcc 0.9181, ValAcc 0.8990 +Epoch 018: Loss 0.2671, TrainAcc 0.9179, ValAcc 0.8990 +Epoch 019: Loss 0.2660, TrainAcc 0.9179, ValAcc 0.8992 +Epoch 020: Loss 0.2650, TrainAcc 0.9188, ValAcc 0.8996 +...... +Epoch 190: Loss 0.1623, TrainAcc 0.9553, ValAcc 0.9134 +Epoch 191: Loss 0.1619, TrainAcc 0.9555, ValAcc 0.9134 +Epoch 192: Loss 0.1615, TrainAcc 0.9555, ValAcc 0.9132 +Epoch 193: Loss 0.1611, TrainAcc 0.9557, ValAcc 0.9130 +Epoch 194: Loss 0.1607, TrainAcc 0.9562, ValAcc 0.9130 +Epoch 195: Loss 0.1603, TrainAcc 0.9559, ValAcc 0.9130 +Epoch 196: Loss 0.1599, TrainAcc 0.9562, ValAcc 0.9126 +Epoch 197: Loss 0.1595, TrainAcc 0.9562, ValAcc 0.9123 +Epoch 198: Loss 0.1591, TrainAcc 0.9562, ValAcc 0.9123 +Epoch 199: Loss 0.1587, TrainAcc 0.9562, ValAcc 0.9123``` + +For test accuracy:around 0.9 + +# TSNE +For the test:iteration=500, with lower dimension to 2 + + + ```python From 9c3a8cf3482c01bdebe0714a6ae3c60cd4453697 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Fri, 21 Oct 2022 06:36:57 -0700 Subject: [PATCH 14/15] Undo --- recognition/XUE4645768/README.md | 46 ++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/recognition/XUE4645768/README.md b/recognition/XUE4645768/README.md index 36250adaa3..94bc1848c0 100644 --- a/recognition/XUE4645768/README.md +++ b/recognition/XUE4645768/README.md @@ -53,6 +53,52 @@ python gcn.py Warning: Please pay attention to whether the data path is correct when you run the gcn.py. +# Training + +Learning rate= 0.01 +Weight dacay =0.005 + +For 200 epoches: +```Epoch 000: Loss 0.2894, TrainAcc 0.9126, ValAcc 0.8954 +Epoch 001: Loss 0.2880, TrainAcc 0.9126, ValAcc 0.895 +Epoch 002: Loss 0.2866, TrainAcc 0.9126, ValAcc 0.8961 +Epoch 003: Loss 0.2853, TrainAcc 0.9132, ValAcc 0.8961 +Epoch 004: Loss 0.2839, TrainAcc 0.9137, ValAcc 0.8961 +Epoch 005: Loss 0.2826, TrainAcc 0.9141, ValAcc 0.8963 +Epoch 006: Loss 0.2813, TrainAcc 0.9146, ValAcc 0.8956 +Epoch 007: Loss 0.2800, TrainAcc 0.9146, ValAcc 0.8956 +Epoch 008: Loss 0.2788, TrainAcc 0.9146, ValAcc 0.8959 +Epoch 009: Loss 0.2775, TrainAcc 0.9146, ValAcc 0.8970 +Epoch 010: Loss 0.2763, TrainAcc 0.915, ValAcc 0.8974 +Epoch 011: Loss 0.2751, TrainAcc 0.915, ValAcc 0.8972 +Epoch 012: Loss 0.2739, TrainAcc 0.915, ValAcc 0.8976 +Epoch 013: Loss 0.2727, TrainAcc 0.9157, ValAcc 0.8979 +Epoch 014: Loss 0.2716, TrainAcc 0.9157, ValAcc 0.8983 +Epoch 015: Loss 0.2704, TrainAcc 0.9161, ValAcc 0.8990 +Epoch 016: Loss 0.2693, TrainAcc 0.9168, ValAcc 0.8988 +Epoch 017: Loss 0.2682, TrainAcc 0.9181, ValAcc 0.8990 +Epoch 018: Loss 0.2671, TrainAcc 0.9179, ValAcc 0.8990 +Epoch 019: Loss 0.2660, TrainAcc 0.9179, ValAcc 0.8992 +Epoch 020: Loss 0.2650, TrainAcc 0.9188, ValAcc 0.8996 +...... +Epoch 190: Loss 0.1623, TrainAcc 0.9553, ValAcc 0.9134 +Epoch 191: Loss 0.1619, TrainAcc 0.9555, ValAcc 0.9134 +Epoch 192: Loss 0.1615, TrainAcc 0.9555, ValAcc 0.9132 +Epoch 193: Loss 0.1611, TrainAcc 0.9557, ValAcc 0.9130 +Epoch 194: Loss 0.1607, TrainAcc 0.9562, ValAcc 0.9130 +Epoch 195: Loss 0.1603, TrainAcc 0.9559, ValAcc 0.9130 +Epoch 196: Loss 0.1599, TrainAcc 0.9562, ValAcc 0.9126 +Epoch 197: Loss 0.1595, TrainAcc 0.9562, ValAcc 0.9123 +Epoch 198: Loss 0.1591, TrainAcc 0.9562, ValAcc 0.9123 +Epoch 199: Loss 0.1587, TrainAcc 0.9562, ValAcc 0.9123``` + +For test accuracy:around 0.9 + +# TSNE +For the test:iteration=500, with lower dimension to 2 + + + ```python From 054f3bcd9ae48c5c1f491bb7030f7b9a8c5a8ff4 Mon Sep 17 00:00:00 2001 From: thomas2001u Date: Fri, 21 Oct 2022 06:37:19 -0700 Subject: [PATCH 15/15] Create README.MD --- recognition/s4641500/README.MD | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 recognition/s4641500/README.MD diff --git a/recognition/s4641500/README.MD b/recognition/s4641500/README.MD new file mode 100644 index 0000000000..7489398309 --- /dev/null +++ b/recognition/s4641500/README.MD @@ -0,0 +1,17 @@ +# OASIS Vector-Quantized Variational Autoencoder +This is an algorithm for reconstructing OASIS images utilising a VQ-VAE. The benefit of using a VQ-VAE over a standard variational autoencoder is the reduction in time taken to train and evaluate the model. Using this VQ-VAE it is possible to encode and generate new 2d slices of human brains. This is especially meaningful for detecting physical anomalies. Because the VQVAE is trained to learn only the most important features of brains, a reconstruction of a diseased or damaged brain image would differ greatly from the input (ie, look more "normal"). This becomes very important for the quick identifiication and classification of physical issues within the human brain and swift diagnoses of concerns. + +![](vae.png)
+(Image from [lucas-bechberger.de](https://lucas-bechberger.de/2018/12/07/what-is-a-%CE%B2-variational-autoencoder/)) + +Vector-Quantized Variational Autoencoders work by using vector quantization to obtain a discrete latent representation. They stand out from standard VAEs, functionally, through incorporation of vector-quantization and how the encoder outputs discrete rather than continuous codes and priors are learned rather than constant. Using the VQ method allows the model to circumvent issues of posterior collapse - where the latents are ignored when they are paired with a powerful autoregressive decoder - typically observed in the VAE framework. Using such representations with a prior, the model can generate high fidelity output. + +## Dependencies +This code was written and tested using tensorflow version 2.10.0 and tensorflow-probability version 0.18.0, numpy 1.23.3, Pillow 9.2.0 for images, matplotlib 3.6.1 for plots and keras version 2.10.0 for models. + +The program was written within a conda environment with cudatoolkit version 11.2.2 and cudnn version 8.1.0.77 installed, and on a Windows system. + +## Results +![](example-reconstruction.png) + +As we can see, the priors constructed from an input after training a model can be used to recreate what the VQ-VAE "thinks a brain should look like" based on the priors code. This impression of what a brain looks like can be trained from 2d image slices of healthy or unhealthy brains, allowing reconstruction in such a way that an unhealthy brain can be reconstructed to look healthy or a healthy brain to look unhealthy - and this can be used to assist detection of potential problems in medical circumstances. \ No newline at end of file