Source code for transformation
# Author: Arrykrishna Mootoovaloo
# Collaborators: Prof. Alan Heavens, Prof. Andrew Jaffe, Dr. Florent Leclercq
# Email : arrykrish@gmail.com/a.mootoovaloo17@imperial.ac.uk
# Affiliation : Imperial Centre for Inference and Cosmology
# Status : Under Development
'''
Functions to transform the inputs and outputs
'''
import numpy as np
[docs]class transformation:
'''
Module to perform all relevant transformation, for example, pre-whitening the inputs and
logarithm (supports log10 transformation) for the outputs.
'''
def __init__(self, theta: np.ndarray, y: np.ndarray):
'''
:param: theta (np.ndarray) : matrix of size N x d
:param: y (np.ndarray) : a vector of the output
:param: N is the number of training points
:param: d is the dimensionality of the problem
'''
# input
self.theta = theta
msg = 'The number of training points is smaller than the dimension of the problem. Reshape your array!'
assert self.theta.shape[0] > self.theta.shape[1], msg
# dimension of the problem
self.d = self.theta.shape[1]
# number of training points
self.N = self.theta.shape[0]
# y is a vector of size N
self.y = y.reshape(self.N, 1)
[docs] def x_transform(self) -> np.ndarray:
'''
Transform the inputs (pre-whitening step)
:return: theta_trans (np.ndarray) : transformed input parameters
'''
# calculate the covariance of the inputs
cov = np.cov(self.theta.T)
# calculate the Singular Value Decomposition
a, b, c = np.linalg.svd(cov)
# see PICO paper for this step
m_diag = np.diag(1.0 / np.sqrt(b))
# the transformation matrix
self.mu_matrix = np.dot(m_diag, c)
# calculate the transformed input parameters
theta_trans = np.dot(self.mu_matrix, self.theta.T).T
# store the transformed inputs
self.theta_trans = theta_trans
return theta_trans
[docs] def x_transform_test(self, xtest: np.ndarray) -> np.ndarray:
'''
Given a test point, we transform the test point in the appropriate basis
:param: xtext (np.ndarray) : a vector of dimension d for the test point
:return: x_trans (np.ndarray) : the transformed input parameters
'''
# reshape the input
xtest = xtest.reshape(self.d,)
# tranform the input using the transformation matrix
x_trans = np.dot(self.mu_matrix, xtest).reshape(1, self.d)
return x_trans
[docs] def y_transform(self) -> np.ndarray:
'''
Transform the output (depends on whether we want this criterion)
If all the outputs are positive, then y_min = 0,
otherwise the minimum is computed and the outputs are shifted by
this amount before the logarithm transformation is applied
:return: y_trans (np.ndarray) : array for the transformed output
'''
if (self.y > 0).all():
# set the minimum to 0.0
self.y_min = 0.0
# calculate te logarithm of the outputs
y_trans = np.log10(self.y)
# store the transformed output
self.y_trans = y_trans
return y_trans
else:
# compute minimum y
self.y_min = np.amin(self.y)
# calcualte the logarithm of the outputs
y_trans = np.log10(self.y - 2 * self.y_min)
# store the transformed output
self.y_trans = y_trans
return y_trans
[docs] def y_transform_test(self, y_original: np.ndarray) -> np.ndarray:
'''
Given a response/output which is not in the training set, this
function will do the forward log_10 transformation.
:param: y_original (float or np.ndarray) : original output
:return: y_trans_test (array) : transformed output
'''
y_trans_test = np.log10(y_original - 2 * self.y_min)
return y_trans_test
[docs] def y_inv_transform_test(self, y_test: np.ndarray) -> np.ndarray:
'''
Given a response (a prediction), this function will do
the inverse transformation (from log_10 to the original function).
:param: y_test (float or np.ndarray) : a test (transformed) response (output)
:return: y_inv (np.ndarray) : original (predicted) output
'''
y_inv = np.power(10, y_test) + 2.0 * self.y_min
return y_inv