diff --git a/.gitignore b/.gitignore index 6e10f0b1b8..fc1e39bc51 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,4 @@ dmypy.json .idea/ # no tracking mypy config file -mypy.ini +mypy.ini \ No newline at end of file diff --git a/recognition/XUE4645768/README.md b/recognition/XUE4645768/README.md index 36250adaa3..94bc1848c0 100644 --- a/recognition/XUE4645768/README.md +++ b/recognition/XUE4645768/README.md @@ -53,6 +53,52 @@ python gcn.py Warning: Please pay attention to whether the data path is correct when you run the gcn.py. +# Training + +Learning rate= 0.01 +Weight dacay =0.005 + +For 200 epoches: +```Epoch 000: Loss 0.2894, TrainAcc 0.9126, ValAcc 0.8954 +Epoch 001: Loss 0.2880, TrainAcc 0.9126, ValAcc 0.895 +Epoch 002: Loss 0.2866, TrainAcc 0.9126, ValAcc 0.8961 +Epoch 003: Loss 0.2853, TrainAcc 0.9132, ValAcc 0.8961 +Epoch 004: Loss 0.2839, TrainAcc 0.9137, ValAcc 0.8961 +Epoch 005: Loss 0.2826, TrainAcc 0.9141, ValAcc 0.8963 +Epoch 006: Loss 0.2813, TrainAcc 0.9146, ValAcc 0.8956 +Epoch 007: Loss 0.2800, TrainAcc 0.9146, ValAcc 0.8956 +Epoch 008: Loss 0.2788, TrainAcc 0.9146, ValAcc 0.8959 +Epoch 009: Loss 0.2775, TrainAcc 0.9146, ValAcc 0.8970 +Epoch 010: Loss 0.2763, TrainAcc 0.915, ValAcc 0.8974 +Epoch 011: Loss 0.2751, TrainAcc 0.915, ValAcc 0.8972 +Epoch 012: Loss 0.2739, TrainAcc 0.915, ValAcc 0.8976 +Epoch 013: Loss 0.2727, TrainAcc 0.9157, ValAcc 0.8979 +Epoch 014: Loss 0.2716, TrainAcc 0.9157, ValAcc 0.8983 +Epoch 015: Loss 0.2704, TrainAcc 0.9161, ValAcc 0.8990 +Epoch 016: Loss 0.2693, TrainAcc 0.9168, ValAcc 0.8988 +Epoch 017: Loss 0.2682, TrainAcc 0.9181, ValAcc 0.8990 +Epoch 018: Loss 0.2671, TrainAcc 0.9179, ValAcc 0.8990 +Epoch 019: Loss 0.2660, TrainAcc 0.9179, ValAcc 0.8992 +Epoch 020: Loss 0.2650, TrainAcc 0.9188, ValAcc 0.8996 +...... +Epoch 190: Loss 0.1623, TrainAcc 0.9553, ValAcc 0.9134 +Epoch 191: Loss 0.1619, TrainAcc 0.9555, ValAcc 0.9134 +Epoch 192: Loss 0.1615, TrainAcc 0.9555, ValAcc 0.9132 +Epoch 193: Loss 0.1611, TrainAcc 0.9557, ValAcc 0.9130 +Epoch 194: Loss 0.1607, TrainAcc 0.9562, ValAcc 0.9130 +Epoch 195: Loss 0.1603, TrainAcc 0.9559, ValAcc 0.9130 +Epoch 196: Loss 0.1599, TrainAcc 0.9562, ValAcc 0.9126 +Epoch 197: Loss 0.1595, TrainAcc 0.9562, ValAcc 0.9123 +Epoch 198: Loss 0.1591, TrainAcc 0.9562, ValAcc 0.9123 +Epoch 199: Loss 0.1587, TrainAcc 0.9562, ValAcc 0.9123``` + +For test accuracy:around 0.9 + +# TSNE +For the test:iteration=500, with lower dimension to 2 + + + ```python diff --git a/recognition/s4641500/README.MD b/recognition/s4641500/README.MD new file mode 100644 index 0000000000..7489398309 --- /dev/null +++ b/recognition/s4641500/README.MD @@ -0,0 +1,17 @@ +# OASIS Vector-Quantized Variational Autoencoder +This is an algorithm for reconstructing OASIS images utilising a VQ-VAE. The benefit of using a VQ-VAE over a standard variational autoencoder is the reduction in time taken to train and evaluate the model. Using this VQ-VAE it is possible to encode and generate new 2d slices of human brains. This is especially meaningful for detecting physical anomalies. Because the VQVAE is trained to learn only the most important features of brains, a reconstruction of a diseased or damaged brain image would differ greatly from the input (ie, look more "normal"). This becomes very important for the quick identifiication and classification of physical issues within the human brain and swift diagnoses of concerns. + +![](vae.png)
+(Image from [lucas-bechberger.de](https://lucas-bechberger.de/2018/12/07/what-is-a-%CE%B2-variational-autoencoder/)) + +Vector-Quantized Variational Autoencoders work by using vector quantization to obtain a discrete latent representation. They stand out from standard VAEs, functionally, through incorporation of vector-quantization and how the encoder outputs discrete rather than continuous codes and priors are learned rather than constant. Using the VQ method allows the model to circumvent issues of posterior collapse - where the latents are ignored when they are paired with a powerful autoregressive decoder - typically observed in the VAE framework. Using such representations with a prior, the model can generate high fidelity output. + +## Dependencies +This code was written and tested using tensorflow version 2.10.0 and tensorflow-probability version 0.18.0, numpy 1.23.3, Pillow 9.2.0 for images, matplotlib 3.6.1 for plots and keras version 2.10.0 for models. + +The program was written within a conda environment with cudatoolkit version 11.2.2 and cudnn version 8.1.0.77 installed, and on a Windows system. + +## Results +![](example-reconstruction.png) + +As we can see, the priors constructed from an input after training a model can be used to recreate what the VQ-VAE "thinks a brain should look like" based on the priors code. This impression of what a brain looks like can be trained from 2d image slices of healthy or unhealthy brains, allowing reconstruction in such a way that an unhealthy brain can be reconstructed to look healthy or a healthy brain to look unhealthy - and this can be used to assist detection of potential problems in medical circumstances. \ No newline at end of file diff --git a/recognition/s4641500/dataset.py b/recognition/s4641500/dataset.py new file mode 100644 index 0000000000..e190cae9d9 --- /dev/null +++ b/recognition/s4641500/dataset.py @@ -0,0 +1,47 @@ +from modules import * +import glob +import os +from PIL import Image + +# load paths +TR_PATH = "keras_png_slices_data/keras_png_slices_train" +TST_PATH = "keras_png_slices_data/keras_png_slices_test" +V_PATH = "keras_png_slices_data/keras_png_slices_validate" + +# data dimensions +IMG_H = 80 +IMG_W = 80 + + +def load_images(p, image_path): + """ + Returns a list of resized images at the given path. + """ + images = [] + + for file in image_path: + image = Image.open(p + '/' + file) + image = image.resize((IMG_H, IMG_W)) + image = np.reshape(image, (IMG_H, IMG_W, 1)) + images.append(image) + return images + + +def load_dataset(): + train_files = os.listdir(TR_PATH) + test_files = os.listdir(TST_PATH) + validation_files = os.listdir(V_PATH) # obsolete? + + # load images from path + train_imgs = load_images(TR_PATH, train_files) + test_imgs = load_images(TST_PATH, test_files) + x_train = np.array(train_imgs) + x_test = np.array(test_imgs) + + # normalise data to [-0.5, 0.5] + x_train_scaled = (x_train / 255.0) - 0.5 + x_test_scaled = (x_test / 255.0) - 0.5 + + # get variance for mse + data_variance = np.var(x_train / 255.0) + return (train_imgs, test_imgs, x_train_scaled, x_test_scaled, data_variance) \ No newline at end of file diff --git a/recognition/s4641500/modules.py b/recognition/s4641500/modules.py new file mode 100644 index 0000000000..bce6f7032e --- /dev/null +++ b/recognition/s4641500/modules.py @@ -0,0 +1,225 @@ +import numpy as np +import matplotlib.pyplot as plt +from tensorflow import keras +from tensorflow.keras import layers +import tensorflow_probability as tfp +import tensorflow as tf + +# vector quantizer class +class VQ(layers.Layer): + def __init__(self, embed_n, embed_d, beta=0.25, **kwargs): + super().__init__(**kwargs) + self.embed_d = embed_d + self.embed_n = embed_n + self.beta = beta + + # fnitialise embeddings to be quantized + w_init = tf.random_uniform_initializer() + self.embeds = tf.Variable( + initial_value=w_init(shape=(self.embed_d, self.embed_n), dtype="float32"), + trainable=True, + name="vqvae-embeddings", + ) + + def call(self, x): + # flatten inputs while maintaining embed_d then quantize + shape = tf.shape(x) + flattened = tf.reshape(x, [-1, self.embed_d]) + encoding_indices = self.get_code_indices(flattened) + encodings = tf.one_hot(encoding_indices, self.embed_n) + quantized = tf.matmul(encodings, self.embeds, transpose_b=True) + + # get back original shape + quantized = tf.reshape(quantized, shape) + + # loss + c_loss = tf.reduce_mean((tf.stop_gradient(quantized) - x) ** 2) + cb_loss = tf.reduce_mean((quantized - tf.stop_gradient(x)) ** 2) + self.add_loss(self.beta * c_loss + cb_loss) + + quantized = x + tf.stop_gradient(quantized - x) + return quantized + + def get_code_indices(self, flattened): + # l2-normalised distance between input and codes + similarity = tf.matmul(flattened, self.embeds) + dists = ( + tf.reduce_sum(flattened**2, axis=1, keepdims=True) + + tf.reduce_sum(self.embeds**2, axis=0) + - 2*similarity + ) + + # get best indices + encode_indices = tf.argmin(dists, axis=1) + return encode_indices + + def get_config(self): + config = super().get_config() + config.update({ + "embed_n": self.embed_n, + "embed_d": self.embed_d, + "beta": self.beta, + }) + return config + +class Train_VQVAE(keras.models.Model): + def __init__(self, train_variance, dim=32, embed_n=128, **kwargs): + super(Train_VQVAE, self).__init__(**kwargs) + self.train_variance = train_variance + self.dim = dim + self.embed_n = embed_n + self.vqvae = get_vqvae(self.dim, self.embed_n) + self.total_loss = keras.metrics.Mean(name="total_loss") + self.reconstruction_loss = keras.metrics.Mean( + name="reconstruction_loss" + + ) + self.vq_loss = keras.metrics.Mean(name="vq_loss") + + @property + def metrics(self): + return [ + self.total_loss, + self.reconstruction_loss, + self.vq_loss, + ] + + def train_step(self, x): + with tf.GradientTape() as tape: + reconstructions = self.vqvae(x) + + # calculate loss + reconstruction_loss = ( + tf.reduce_mean((x - reconstructions) ** 2) / self.train_variance + ) + total_loss = reconstruction_loss + sum(self.vqvae.losses) + + # backpropagate + grads = tape.gradient(total_loss, self.vqvae.trainable_variables) + self.optimizer.apply_gradients(zip(grads, self.vqvae.trainable_variables)) + + # track loss + self.total_loss.update_state(total_loss) + self.reconstruction_loss.update_state(reconstruction_loss) + self.vq_loss.update_state(sum(self.vqvae.losses)) + return { + "loss": self.total_loss.result(), + "reconstruction_loss": self.reconstruction_loss.result(), + "vqvae_loss": self.vq_loss.result(), + } + +def encoder(dim=16): + inputs = keras.Input(shape=(80, 80, 1)) + x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")( + inputs + ) + x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x) + out = layers.Conv2D(dim, 1, padding="same")(x) + return keras.Model(inputs, out, name="encoder") + +def decoder(dim=16): + inputs = keras.Input(shape=encoder(dim).output.shape[1:]) + x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")( + inputs + ) + x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x) + out = layers.Conv2DTranspose(1, 3, padding="same")(x) + return keras.Model(inputs, out, name="decoder") + +def get_vqvae(dim=16, embed_n=64): + vq_layer = VQ(embed_n, dim, beta=0.25, name="vector_quantizer") + enc = encoder(dim) + dec = decoder(dim) + inputs = keras.Input(shape=(80, 80, 1)) + out = enc(inputs) + quantized_latents = vq_layer(out) + reconstructions = dec(quantized_latents) + return keras.Model(inputs, reconstructions, name="vq_vae") + +# builds on the 2D convolutional layer, but includes masking +class PixelConvLayer(layers.Layer): + def __init__(self, mask_type, **kwargs): + super(PixelConvLayer, self).__init__() + self.mask_type = mask_type + self.conv = layers.Conv2D(filters = 128, kernel_size = 3, activation = "relu", padding = "same") + + def build(self, input): + # initialize kernel variables + self.conv.build(input) + # create the mask + kernel_shape = self.conv.kernel.get_shape() + self.mask = np.zeros(shape=kernel_shape) + self.mask[: kernel_shape[0] // 2, ...] = 1.0 + self.mask[kernel_shape[0] // 2, : kernel_shape[1] // 2, ...] = 1.0 + if self.mask_type == "B": + self.mask[kernel_shape[0] // 2, kernel_shape[1] // 2, ...] = 1.0 + + def call(self, inputs): + self.conv.kernel.assign(self.conv.kernel * self.mask) + return self.conv(inputs) + + def get_config(self): + config = super().get_config() + config.update({ + "mask_type": self.mask_type, + }) + return config + +# residual block layer +class ResBlock(keras.layers.Layer): + def __init__(self, filters, **kwargs): + super(ResBlock, self).__init__(**kwargs) + self.filters = filters + self.conv_1 = keras.layers.Conv2D(filters=self.filters, kernel_size=1, activation="relu") + self.pixel_conv = PixelConvLayer(mask_type="B", filters=self.filters // 2, kernel_size=3, + activation="relu", padding="same",) + self.conv_2 = keras.layers.Conv2D(filters=self.filters, kernel_size=1, activation="relu") + + def call(self, inputs): + conv = self.conv_1(inputs) + conv = self.pixel_conv(conv) + conv = self.conv_2(conv) + return keras.layers.add([inputs, conv]) + + def get_config(self): + config = super().get_config() + config.update({ + "filters" : self.filters + }) + return config + +def get_pcnn(vqvae, encoded_outputs): + """ + Builds and returns the PixelCNN model. + """ + + # init num of PCNN blocks + num_residual_blocks = 2 + num_pixelcnn_layers = 2 + input_shape = encoded_outputs.shape[1:-1] + print(f"Input shape of the PixelCNN: {input_shape}") + + # initialise inputs to PCNN + pcnn_inputs = keras.Input(shape=input_shape, dtype=tf.int32) + ohe = tf.one_hot(pcnn_inputs, vqvae.embed_n) + x = PixelConvLayer(mask_type="A", filters=128, kernel_size=7, activation="relu", padding="same")(ohe) + + # build PCNN model + for _ in range(num_residual_blocks): + x = ResBlock(filters=128)(x) + + for _ in range(num_pixelcnn_layers): + x = PixelConvLayer( + mask_type="B", + filters=128, + kernel_size=1, + strides=1, + activation="relu", + padding="valid", + )(x) + + # outputs from PCNN + out = keras.layers.Conv2D(filters=vqvae.embed_n, kernel_size=1, strides=1, padding="valid")(x) + pcnn = keras.Model(pcnn_inputs, out, name="pixel_cnn") + + return pcnn \ No newline at end of file diff --git a/recognition/s4641500/predict.py b/recognition/s4641500/predict.py new file mode 100644 index 0000000000..aad05339b4 --- /dev/null +++ b/recognition/s4641500/predict.py @@ -0,0 +1,141 @@ +from dataset import * +from modules import * +import numpy.random as r + +NUM_IMGS = 8 + +def show_subplot(initial, reconstr): + """ + Displays original and reconstructed image and their SSIM. + Calculates and returns the SSIM between the two images + """ + + # find SSIM + image1 = tf.image.convert_image_dtype(initial, tf.float32) + image2 = tf.image.convert_image_dtype(reconstr, tf.float32) + ssim = tf.image.ssim(image1, image2, max_val=1.0) + plt.suptitle("SSIM: %.2f" %ssim) + + plt.subplot(1, 2, 1) + plt.imshow(initial.squeeze() + 0.5, cmap=plt.cm.gray) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(reconstr.squeeze() + 0.5, cmap=plt.cm.gray) + plt.title("Reconstructed") + plt.axis("off") + + plt.show() + return ssim + +def reconstruct_images(n, test_data, model): + # select n random test images + randx = np.random.choice(len(test_data), n) + test_imgs = test_data[randx] + + # predictions on test images + reconstructions_test = model.predict(test_imgs) + + # sum of the SSIM of all resconstructed images + total_ssim = 0.0 + + # visualise + for image, reconstructed in zip(test_imgs, reconstructions_test): + ssim = show_subplot(image, reconstructed) + total_ssim = total_ssim + ssim + + return test_imgs + +def visualise_codes(test_imgs, codebook_indices): + # visualise the orignal images and their discrete codes + for i in range(len(test_imgs)): + plt.subplot(1, 2, 1) + plt.imshow(test_imgs[i].squeeze() + 0.5, cmap=plt.cm.gray) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(codebook_indices[i], cmap=plt.cm.gray) + plt.title("Code") + plt.axis("off") + plt.show() + +def plot_pcnn_loss(pcnn_hist): + # plot loss for PixelCNN model + plt.plot(pcnn_hist.history['loss']) + plt.plot(pcnn_hist.history['val_loss']) + plt.title('PixelCNN Loss') + plt.ylabel('loss') + plt.xlabel('epoch') + plt.legend(['Train', 'Validation'], loc='upper left') + plt.show() + +def plot_pcnn_acc(pcnn_hist): + # plot accuracy for PixelCNN model + plt.plot(pcnn_hist.history['accuracy']) + plt.plot(pcnn_hist.history['val_accuracy']) + plt.title('PixelCNN Accuracy') + plt.ylabel('accuracy') + plt.xlabel('epoch') + plt.legend(['Train', 'Validation'], loc='upper left') + plt.show() + +def prior_gen(pcnn, batch=10): + """Creates and returns priors generated from PCNN model.""" + + priors = np.zeros(shape=(batch,) + (pcnn.input_shape)[1:]) + batch, rows, cols = priors.shape + + # iterate over the priors - must be done one pixel at a time + for row in range(rows): + for col in range(cols): + logits = pcnn.predict(priors) + sampler = tfp.distributions.Categorical(logits) + probs = sampler.sample() + priors[:, row, col] = probs[:, row, col] + + return priors + +def show_novel_imgs(priors, vqvae, quantizer, encoded_outputs): + # visualise images + for i in range(10): + plt.subplot(1, 2, 1) + plt.imshow(priors[i], cmap = plt.cm.gray) + plt.title("Code") + plt.axis("off") + + + +def main(): + train_images, test_images, train_data, test_data, variance = load_dataset() + vqvae = keras.models.load_model("vqvae.h5", custom_objects = {"VQ": VQ}) + image_inds = r.choice(len(test_data), 8) + images = test_data[image_inds] + + n = NUM_IMGS + ssim = reconstruct_images(n, test_data, vqvae) + avg_ssim = ssim / n + + quantizer = vqvae.get_layer("vector_quantizer") + + encoded_outputs = vqvae.get_layer("encoder").predict(test_data) + # reduce indices because my VRAM is insufficient + encoded_outputs = encoded_outputs[:len(encoded_outputs) // 2] + flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) + + codebook_indices = quantizer.get_code_indices(flat_enc_outputs) + codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) + + + visualise_codes(images, codebook_indices) + + pcnn = keras.models.load_model("pcnn.h5", custom_objects = {"PixelConvLayer": PixelConvLayer, "ResBlock": ResBlock}) + + # generate the priors + priors = prior_gen(pcnn, batch=5) + + show_novel_imgs(priors, vqvae, quantizer, encoded_outputs) + +if __name__ == "__main__": + main() diff --git a/recognition/s4641500/train.py b/recognition/s4641500/train.py new file mode 100644 index 0000000000..cb0a439439 --- /dev/null +++ b/recognition/s4641500/train.py @@ -0,0 +1,27 @@ +from dataset import * +from modules import * + +EPOCHS = 50 +BATCH_SIZE = 128 + +train_images, test_images, train_data, test_data, data_variance = load_dataset() +vqvae_trainer = Train_VQVAE(data_variance, dim=16, embed_n=128) +vqvae_trainer.compile(optimizer=keras.optimizers.Adam()) +vqvae_hist = vqvae_trainer.fit(train_data, epochs=EPOCHS, batch_size=128) + +encoded_out = vqvae_trainer.vqvae.get_layer("encoder").predict(test_data) +qtiser = vqvae_trainer.vqvae.get_layer("vector_quantizer") +flat_encs = encoded_out.reshape(-1, encoded_out.shape[-1]) +codebooks = qtiser.get_code_indices(flat_encs) +codebooks = codebooks.numpy().reshape(encoded_out.shape[:-1]) +pixel_cnn = get_pcnn(vqvae_trainer, encoded_out) +pixel_cnn.compile(optimizer=keras.optimizers.Adam(3e-4), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=["accuracy"],) +pcnn_training = pixel_cnn.fit(x = codebooks, y = codebooks, batch_size = 128, epochs = EPOCHS, validation_split = 0.1) +pixel_cnn.save("pcnn.h5") + + +vqvae = vqvae_trainer.vqvae +vqvae.save("vqvae.h5") +pixel_cnn.save("pcnn.h5") \ No newline at end of file