#THis script will fit the Emulator to a single PFT, and then save the results.
#Note that this has to be redone 5 times (once per pft)
PFT_to_emul = "BT"

#NOTE that you may get slightly different results due to the optimiser finding different hyperparameter values

import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import os


#import the data (wherever you put this)
Emul_Data = pd.read_csv("Data\\Training_Data_standardised.csv")

#grab output (gpp for that pft)
y = (np.array(Emul_Data["gpp_"+PFT_to_emul])).reshape(-1,1).astype(np.float64)


#drop the calibration parameters that arent relevant

#get irrelevant parameters
pft_names = ["BT", "NT", "C3g", "SH", "Cr"]
pft_names = [pft for pft in pft_names if PFT_to_emul not in pft]
names0 = [s for s in Emul_Data.columns if pft_names[0] in s]
names1 = [s for s in Emul_Data.columns if pft_names[1] in s]
names2 = [s for s in Emul_Data.columns if pft_names[2] in s]
names3 = [s for s in Emul_Data.columns if pft_names[3] in s]

#drop unanted columns
Emul_Data = Emul_Data.drop(names0+names1+names2+names3, axis = 1)
#and drop other unwanted columns (ones that arent getting fed to emulator)
Emul_Data = Emul_Data.drop(["index", "time", "year", "lon", "lat", "X", "Y", "gpp_gb", PFT_to_emul, "gpp_"+PFT_to_emul], axis=1)


#grab input
X = np.array(Emul_Data)

#save names 
variable_names = Emul_Data.columns 


#remove data (save memory)
del(Emul_Data)

#establish some attributes
N =  X.shape[0] #how many samples are there
dim = X.shape[1] #how many continuous dims?


#then active_vars 
active_vars = list(range(dim)) #get all the variables

#time fitting (not essential)
startTime = datetime.now()


#emulator

import gpflow
import tensorflow as tf
   
with gpflow.defer_build():
    #build the covariance function
    k = gpflow.kernels.RBF(input_dim = dim, active_dims = active_vars, ARD = True)

    #SVGP:
    M = min(10*dim, len(X)) # Number of inducing locations
    Z = X[np.random.choice(range(0, len(X)), M, replace=False)].copy() # Initialise inducing locations to a random M inputs in the dataset
    m = gpflow.models.SVGP(X = X, Y = y, Z = Z, kern=k, mean_function = None, likelihood = gpflow.likelihoods.Gaussian(), minibatch_size=1000)

m.compile()


#now lets optimise
class Logger(gpflow.actions.Action):
    def __init__(self, model):
        self.model = model
        self.logf = []
    
    def run(self, ctx):
        if (ctx.iteration % 10) == 0:
            # Extract likelihood tensor from Tensorflow session
            likelihood = - ctx.session.run(self.model.likelihood_tensor)
            # Append likelihood value to list
            self.logf.append(likelihood)


# We turn off training for inducing point locations?
m.feature.trainable = True



def run_adam(model, iterations):
    """
    Utility function running the Adam Optimiser interleaved with a `Logger` action.

    :param model: GPflow model
    :param interations: number of iterations
    """
    # Create an Adam Optimiser action
    adam = gpflow.train.AdamOptimizer().make_optimize_action(model)
    # Create a Logger action
    logger = Logger(model)
    actions = [adam, logger]
    # Create optimisation loop that interleaves Adam with Logger
    loop = gpflow.actions.Loop(actions, stop=iterations)()
    # Bind current TF session to model
    model.anchor(model.enquire_session())
    return logger
    


startTime = datetime.now()

#run optimsier (note this can take a while, takes me about 20 mins, but 10000 optimisation iterations is perhaps overkill)
logger = run_adam(m, gpflow.test_util.notebook_niter(1000))

#this is the results of the optimsier - it should have levelled off and hit equilibrium
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.plot(-np.array(logger.logf))
plt.xlabel('iteration')
plt.ylabel('ELBO');



print(datetime.now() - startTime)




#now save results
import pickle

with open('place_to_save_results//SVGP_'+PFT_to_emul+'.pkl', 'wb') as fp:
    pickle.dump(m.read_trainables(), fp)




