# Author: Arrykrishna Mootoovaloo
# Collaborators: Prof. Alan Heavens, Prof. Andrew Jaffe, Dr. Florent Leclercq
# Email : arrykrish@gmail.com/a.mootoovaloo17@imperial.ac.uk
# Affiliation : Imperial Centre for Inference and Cosmology
# Status : Under Development
'''
Routine to train all GPs in parallel to emulate the 3D Power Spectrum
'''
import numpy as np
import multiprocessing as mp
# our scripts
import utils.common as uc
import utils.helpers as hp
import ml.optimisation as op
import settings as st
np.set_printoptions(precision=4, suppress=True)
logger = uc.get_logger('training', 'training', 'logs')
[docs]def train(cosmologies: np.ndarray, target: np.ndarray, folder_name: str, fname: str, kwargs: dict) -> None:
'''
Function to train GPs
:param: cosmologies (np.ndarray) : array of size N_train x N_dim for the inputs to the GP
:param: target (np.ndarray) : an array for the targets (function)
:param: folder_name (str) : name of the folder where the outputs are stored
:param: fname (str) : name of the GP output
:param: kwargs (dict) : a dictionary with the settings for the GPs, for example, lambda_cap = 1000
'''
# Sometimes the GP might break due to numerical instrability
# hence, try except
try:
# train the GP
gp_model = op.maximise(cosmologies, target, **kwargs)
# store the GP in the specific folder
hp.store_pkl_file(gp_model, folder_name, fname)
# add some important information to the log file
info1 = folder_name + '/' + fname
evidence = np.around(gp_model.min_chi_sqr, 4)
logger.info(info1)
logger.info(evidence)
except BaseException:
pass
[docs]def worker(args: list) -> None:
'''
The argument here is simply the name of the output vector
:param: args (list) : list containing the arguments to be fed for training GPs
'''
train(*args)
[docs]def parallel_training(arguments: list) -> None:
'''
Call the parallel processing routine here
:param: arguments (list) - list of arguments (inputs) to train the GPs
:return: None
'''
# count number of CPU
ncpu = mp.cpu_count()
# run procedure in parallel
pool = mp.Pool(processes=ncpu)
pool.map(worker, arguments)
pool.close()
[docs]def main(directory: str = 'semigps') -> None:
'''
Main function to train all the Gaussian Process models.
:param: directory (str) - directory where the GPs are stored.
:return: None
'''
# if we are using the 3 components and excluding neutrino
if st.components and not st.neutrino:
# the inputs are fixed
cosmologies = hp.load_arrays('trainingset/components', 'cosmologies')
# load the growth factor
growth_factor = hp.load_arrays('trainingset/components', 'growth_factor')
# load the q functions
q_function = hp.load_arrays('trainingset/components', 'q_function')
# load the linear matter power spectrum
pk_linear = hp.load_arrays('trainingset/components', 'pk_linear')
# number of GPs for each component
n_gf = growth_factor.shape[1]
n_qf = q_function.shape[1]
n_pl = pk_linear.shape[1]
# folders where we want to store the GPs
folder_gf = directory + '/pknl_components' + st.d_one_plus + '/gf'
folder_qf = directory + '/pknl_components' + st.d_one_plus + '/qf'
folder_pl = directory + '/pknl_components' + st.d_one_plus + '/pl'
# generate arguments to be fed to parallel routine
# arguments for the growth factor
arg_gf = [[cosmologies, growth_factor[:, i], folder_gf, 'gp_' + str(i), st.gf_args] for i in range(n_gf)]
# arguments for the q(k,z) function
if st.emu_one_plus_q:
arg_qf = [[cosmologies, 1.0 + q_function[:, i], folder_qf, 'gp_' + str(i), st.qf_args] for i in range(n_qf)]
else:
arg_qf = [[cosmologies, q_function[:, i], folder_qf, 'gp_' + str(i), st.qf_args] for i in range(n_qf)]
# arguments for linear matter power spectrum
arg_pl = [[cosmologies, pk_linear[:, i], folder_pl, 'gp_' + str(i), st.pl_args] for i in range(n_pl)]
# idea: should we emulate 1 + q(k,z) rather than q(k,z) to prevent Pk to be negative?
# idea: should we use log-transformation for A(z) and q(k,z) or 1 + q(k,z)?
# solution: option for the user to try these possibilities in any case;
# perform training in parallel
parallel_training(arg_gf)
parallel_training(arg_qf)
parallel_training(arg_pl)
# if we are using the 3 components and excluding neutrino
if st.components and st.neutrino:
# the inputs are fixed
cosmologies = hp.load_arrays('trainingset/components', 'cosmologies_neutrino')
# load the growth factor
growth_factor = hp.load_arrays('trainingset/components_neutrino', 'growth_factor')
# load the q functions
q_function = hp.load_arrays('trainingset/components_neutrino', 'q_function')
# load the linear matter power spectrum
pk_linear = hp.load_arrays('trainingset/components_neutrino', 'pk_linear')
# number of GPs for each component
n_gf = growth_factor.shape[1]
n_qf = q_function.shape[1]
n_pl = pk_linear.shape[1]
# folders where we want to store the GPs
folder_gf = directory + '/pknl_neutrino_components' + st.d_one_plus + '/gf'
folder_qf = directory + '/pknl_neutrino_components' + st.d_one_plus + '/qf'
folder_pl = directory + '/pknl_neutrino_components' + st.d_one_plus + '/pl'
# generate arguments to be fed to parallel routine
# arguments for the growth factor
arg_gf = [[cosmologies, growth_factor[:, i], folder_gf, 'gp_' + str(i), st.gf_args] for i in range(n_gf)]
# arguments for the q(k,z) function
if st.emu_one_plus_q:
arg_qf = [[cosmologies, 1.0 + q_function[:, i], folder_qf, 'gp_' + str(i), st.qf_args] for i in range(n_qf)]
else:
arg_qf = [[cosmologies, q_function[:, i], folder_qf, 'gp_' + str(i), st.qf_args] for i in range(n_qf)]
# arguments for linear matter power spectrum
arg_pl = [[cosmologies, pk_linear[:, i], folder_pl, 'gp_' + str(i), st.pl_args] for i in range(n_pl)]
# perform training in parallel
parallel_training(arg_gf)
parallel_training(arg_qf)
parallel_training(arg_pl)
if not st.components and not st.neutrino:
# the inputs are fixed
cosmologies = hp.load_arrays('trainingset/pk_nonlinear', 'cosmologies')
# the non linear matter power spectrum
pk_nl = hp.load_arrays('trainingset/pk_nonlinear', 'pk_nl')
# number of GPs
npk = pk_nl.shape[1]
# folder where we want to store the GPs
folder_pknl = directory + '/pknl' + st.d_one_plus + '/'
args = [[cosmologies, pk_nl[:, i], folder_pknl, 'gp_' + str(i), st.pknl_args] for i in range(npk)]
# perform training in parallel
parallel_training(args)
if not st.components and st.neutrino:
# the inputs are fixed
cosmologies = hp.load_arrays('trainingset/pk_nonlinear', 'cosmologies_netrino')
# the non linear matter power spectrum
pk_nl = hp.load_arrays('trainingset/pk_nonlinear', 'pk_nl_neutrino')
# number of GPs
npk = pk_nl.shape[1]
# folder where we want to store the GPs
folder_pknl = directory + '/pknl_neutrino' + st.d_one_plus + '/'
args = [[cosmologies, pk_nl[:, i], folder_pknl, 'gp_' + str(i), st.pknl_args] for i in range(npk)]
# perform training in parallel
parallel_training(args)
# Standard boilerplate to call the main() function to begin the program.
if __name__ == '__main__':
main()