/*
   This is the configuration file for running Gaussian Process
   experiments. The comments explain what each component does.

   The file is included in main(), so everything has to be valid C -
   remember to end lines with ; etc. Do not change types or names of
   variables either
*/

/******************************************************************
 Define what operations to do. Other parameters dealing with
 details of each of these are defined below.

 N.B. In the current version of satGP, meanfunction fitting,
 covariance fitting, and GP marginal computation need to be
 done sequentially: e.g. if you want to learn the mean function
 coefficients and then run the GP in a grid, you need to do
 that in two experiments: the learned parameters are not passed
 automatically to the GP computation routines. Instead, you
 need to carry out another experiment with the appropriate
 config file (this file, gpconfig.h).
********************************************************************/

/* Set to non-zero to fit type-3 meanfunction's beta coefficients. The
   "local" refers to that the parameters are different in the end from
   grid point to grid point. */
const int fit_local_betas = 0;

/* Fit covariance kernel parameters of the covariance kernels
   chosen. Possible values are:

   0: No covariance parameter fitting
   1: Synthetic study: generate synthetic data according to the kernel
      values and then calibrate. See below for data generation
      details.
   2: Real data case: calibrate after adding real data. */
const int covariance_fitting_type = 0;

/* GP computation type. The options are:
   0: Nothing
   1: Calculate GP marginals (mean and uncertainties)
   2: Sample from the Gaussian Process */
const int simulation_type = 1;

/* If 0, do the regular OCO-2 xco2 as dependent variable. If 1, do u
   wind component, if 2 do v wind component. If you add your own
   dependent variables and edit the data reading routines, you
   probably want to use 0 here. */
const int dependent_variable = 0;

/* ---- Parameters controlling data reading and grid specification for
	either GP or mean function parameter calibration. For
	covariance kernel training, some parameters like maxdays and
	area are overriden below. ---- */

/* Area to look at. These are defined at the top of gaussian_proc.h
   and can be modified there as needed. For plotting, the same areas
   are listed in plotting/areas.py. To get the correct plots, you need
   to have the same definitions in both for any given key. */
char area[] = "World";

/* Number of calendar days that we want to simulate. Not all of those
   will have data available. When random data is generated, this is
   the span where that data will be. 

   If you are using a daylist.txt file (use_daylist below is
   non-zero), then make sure that those days are within the span
   defined here, since otherwise you'll get no observations in your
   time span that would inform the GP and you'll just see your
   prior. An indication of this is that the uncertainty field has the
   same value (of max covariance) everywhere.
*/
const int maxdays = 1600;

/* Read list of days to simulate from daylist.txt. The list is
   generated by create_daylist.py, run by gproc.sh. If this is
   non-zero, only the daynumbers in daylist will be
   simulated. Otherwise all days are simulated. Currently only works
   with simulation_type=1.*/
int use_daylist = 1;

/* Directory from which observation data is read. Currently needs to
   be in nc4 files in oco-2 format. All files in the dir are read so
   no other files can be present. */
char datadir[256] = "../../data/oco2_v9/";

/* Grid resolution in degrees. Even for covariance fitting, this
   matters as data thinning depends on gridres. */
const float gridres = 2.;

/* Mean distance of observations (in grid points at equator) that are
   read in at random. For local beta fitting this is overridden by
   beta_fitting_obs_, but for subsequent computations (e.g. gridGP())
   this constant is used again. Use a tiny tiny value for including
   everything and a huge value for including nothing.*/
const float obs_dist = 1.;

/* Approximate number of spatial gridpoints to handle per data
   sweep. This handles memory constraints; too large domains will make
   program run out of memory. Note that parameter calibration does not
   adhere to this. */
uint gi_per_sweep = 5000000;

/* How many times do we sweep from corner to corner when calculating
   the MRF to get local betas? The results are pretty final after one
   sweep so number 1 should be used here.*/
uint opt_iters = 1;

/* ---- Parameters defining the mean function ---- */

/* Mean function is subtracted from data before GPfitting. The mean
   function is a function of time and parameters, that can possibly
   vary spatially. The parameters are called "beta"
   parameters. Currently global_mean_on_day() is the only available
   one, but in should not be too much work to add others. The form is
   written below. The available mean function types are:

   0: zero mean function: subtract nothing 1: globally same
   beta-coefficients 2: an interpolated field of numbers: just read in
   a file containing a space- and time-dependent field and subtract
   the numbers if needed. Interpolate if necessary.  3: spatially
   varying beta-coefficients.
*/
int mftype = 3;

/* Mean function to use if mftype is 1 or 3. takes in a float (time)
   and spits out a float (e.g. the time-dependent variable like
   dv). The default function global_mean_on_day() is defined in
   mean_functions.h, which is #included before this file in gp.c via
   gaussian_proc.h. */
float (*mean_function)(float, float*) = &global_mean_on_day;

/* Number of coefficients that the function takes in. */
const int mfcoeff = 5;

/* Coefficients for mean function if mftype == 1. Also used as
   starting values for the local beta field optimization. */
float mf_type1_coeffs[] = {-1.71768, -0.503734, 395.7915, 0.416504, 0.90167};

/* Meanfunction data for reading values for mftype == 2 (results of a
   previous GP etc.). To read it in correcly, the area specification
   and grid resolution (degrees) need to be given. The dimensions are
   (time, lat, lon) - the same format in which data is written out by
   gridGP. */
char mf_type2_area[] = "World";
float mf_type2_file_gridres = 5.;
char mf_type2_datafile[] = "/foo/bar";

/* File for reading coefficients for mean function if mftype == 3. To
   read it in correcly, the area specification and grid resolution
   (degrees) need to be given */
char mf_type3_area[] = "World";
float mf_type3_file_gridres = 2.;
 char mf_type3_coeff_file[] = "../../data/meanfunction_fields_susiluoto_et_al_2020/fitted_betas.txt";

/* ---- Meanfunction (type 3) calibration-related parameters ---- */

/* Parameter limits, bl for beta low, bh for beta high. Also the
   non-multiplicative parameters (beta[4] below) are included. This is
   for global_mean_on_day(), which has the form

   beta[0]*sin(x + beta[4]) + beta[1]*cos(2*x + beta[4]) + beta[3]*x + beta[2]

   where x is time, with period of one year corresponding to the 2*pi
   period. */
double bl[] = {-5, -2, 380, 0.415, -1.*M_PI};
double bh[] = { 5,  2, 410, 0.417, 1.0*M_PI};

/* This sets the distance scale for beta parameter calibration. Value
   0.3 produces relatively smooth fields, and is the maximum, while
   smaller values produce more local variation.  */
float dscale = .3*gridres;

/* Mean distance of consecutive read-in observations used for local
   beta fitting. Since domains are often large, there are memory
   constraints and also lots of redundancy, so only a fraction is
   used. */
float beta_fitting_obs_dist = 5.;

/* ---- Covariance kernel parameter definitions ---- */

/* Number of subkernels from which the multiscale kernel is
   constructed. With nkernels=1, this becomes a non-multiscale
   kernel. */
const uint nkernels = 2;

/* Choose these kernel components to construct the actual multiscale
   kernel. You need to set the appropriate parameters below. For
   description, see below. The number codes are decribed below.

   N.B. The kernels should be listed with roughly increasing spatial
   order.
*/
int kernelcomponents[] = {4,1};

/* Kernel types and  parameters for the multiscale kernel. The kernel types are:

   1 - kexp:  squared exponential kernel
   2 - kstat: a 'static' kernel, with no time-dependence, should be used
	      alone. Possibly does not work at the moment.
   3 - kper:  periodic kernel (annual period)
   4 - kmat:  matern kernel, currently with smoothness parameter nu=5/2
   5 - kwind: experimental wind-informed kernel. wind data (variables u,v)
	      must be available in input files for each measurement.

   The parameters are:
   _tau:  square root of maximum covariance parameter
   _llat: length scale latitude direction (unit ball distances)
   _llon: length scale longitude direction (unit ball distances)
   _lt:   length scale in time direction (seconds)
   _lper: periodic kernel peak width parameter
   _l:    wind-informed kernel sphere scale parameter that gets
	  elongated according to rho along the wind axis, and shrunk
	  along the axis orthogonal to wind.
   _rho:  wind-informed kernel skewing factor. Currently produces
	  nans with too large values
*/

float kexp_tau = 2.5;
float kexp_llat = 0.15;
float kexp_lt = 7*24*3600;;
float kexp_llon = 0.15;

float kmat_tau = .5;
float kmat_llat = 0.007;
float kmat_llon = 0.01;
float kmat_lt = 7*24*3600;

float kper_tau = 1.;
float kper_llat = 0.02;
float kper_llon = 0.02;
float kper_lper = 0.1;

float kwind_tau = 1.5;
float kwind_l = 0.05;
float kwind_lt = 7*24*3600;;
float kwind_rho = 50.;

/* ---- Parameters affecting learning the covariance parameters ---- */

/* Maximum size of covariance kernels. The total size will be
   nkernels*max_component_ksize. Remember that inverting the
   covariance is O(n^3). In practice a good/fast value is anything
   under 200/nkernels. */
int max_component_ksize = 512/nkernels;


/* Limits for the parameters when learning the covariance kernel */

float kexp_tau_low = .5;
float kexp_llat_low = 0.01;
float kexp_llon_low = 0.01;
float kexp_lt_low = 7*24*3600;

float kexp_tau_high = 5.;
float kexp_llat_high = .5;
float kexp_llon_high = .5;
float kexp_lt_high = 7*8*24*3600;

float kmat_tau_low = .0000;
float kmat_llat_low = 0.0000000;
float kmat_llon_low = 0.0000000;
float kmat_lt_low = 0;

float kmat_tau_high = 1.5;
float kmat_llat_high = 0.05;
float kmat_llon_high = 0.05;
float kmat_lt_high = 7*24*3600;

float kper_tau_low = .0;
float kper_llat_low = 0.000;
float kper_llon_low = 0.000;
float kper_lper_low = 0.0;

float kper_tau_high = 3.;
float kper_llat_high = 0.1;
float kper_llon_high = 0.1;
float kper_lper_high = 0.4;

float kwind_tau_low = 0.;
float kwind_l_low = 0.0;
float kwind_lt_low = 0;
float kwind_rho_low = 0.;

float kwind_tau_high = 5.;
float kwind_l_high = 0.5;
float kwind_lt_high = 8*7*24*3600;
float kwind_rho_high = 100.;

/* Override parameter values here with a vector from e.g. optimization. */

/* Parameters learned in gmd-2019-156 describing satGP */
float x_opt[] = {8.99421000e-01, 5.13264000e-03, 3.62751000e-02, 7.33341000e+04,
		 2.71597000e+00, 4.18367000e-02, 3.97428000e-01, 1.45512000e+06};

kmat_tau  = x_opt[0];
kmat_llat = x_opt[1];
kmat_llon = x_opt[2];
kmat_lt   = x_opt[3];

kexp_tau  = x_opt[4];
kexp_llat = x_opt[5];
kexp_llon = x_opt[6];
kexp_lt   = x_opt[7];

/* Parameters learned for OCO-2 wind kernel are here:
kwind_tau = 2.074;
kwind_l = 0.038;
kwind_lt = 288893.;
kwind_rho = 56.674;
*/

/* Parameters used for getting the OCO-2 mean function coefficients
kexp_tau = 1.;
kexp_llat = 0.025;
kexp_llon = 0.04;
kexp_lt = 21*24*3600;

kmat_tau = .5;
kmat_llat = 0.007;
kmat_llon = 0.01;
kmat_lt = 7*24*3600;
*/

/* Number of reference points in whose neighborhoods the maximum
   likelihood estimates will be calculated. Some multiple of number of
   threads available is a good idea. Note that number of threads for
   computation is set in the gproc.sh script.  */
size_t nrefpoints = 12;

/* If read_synthetic_data_from_file is non-zero, data will be read
   from the file specified by variable syntheticdatafile instead of
   regenerating it. The number of data read is given by
   nsyntheticdata. This can be used for carrying out repeated
   experiments with same synthetic data.*/
int read_synthetic_data_from_file = 0;
char syntheticdatafile[] = "../../foo/bar";

/* The number of synthetic data generated in case
   covariance_fitting_type == 1 */
size_t nsyntheticdata = 100000;

/* The standard deviation of the random error added to synthetic
   observations */
float sigma_synthetic_obs = 0.01;

/* Area in which the covariance parameter training is done. For
   performance especially when debugging, a smaller area could be
   chosen here. */
char area_train_cov[] = "EastAsia";

/* The number of days for the covariance parameter training. */
int maxdays_train_cov = maxdays;

/* MCMC or optimization? When mcmc_iters is zero, optimization by
   running NLOpt is run, and when it is a positive integer, adaptive
   Metropolis MCMC is run with mcmc_iters iterations. */
size_t mcmc_iters = 10000000;

/* Wind kernel specific stuff */

/* Shall we use gridded winds from txt files? If not, then
   nearest-neighbor winds are used. */
int use_gridded_winds = 0;

/* Files to read gridded winds from. Note that at the moment need to
   be same resolution and area as experiment. Adding interpolation
   would be trivial though. */
char gridded_vwinds_file[] = "../../experiments/experiment_vwinds/gp_mean.txt";
char gridded_uwinds_file[] = "../../experiments/experiment_uwinds/gp_mean.txt";
