#include <gaussian_proc.h>
#include <calibrate.h>

int main() {
  /* This is the main driving routine of satGP. */
  
#include <gpconfig.h>

  /* initialize prng only once */
  srandom(12345);

  size_t i, j, k;

  struct config E;
  struct config E_betas;
  struct config E_for_MLE;
  struct config E_synthetic_data;
  struct state *S = NULL;
  struct state *S_for_MLE = NULL;
  struct covfunconfig *cfc_compound = NULL;

  float *mftype3data = NULL;
  float *mftype2data = NULL;

  /* Output array  */
  float *synthetic_data = NULL;

  float tmp;

  int ndays_in_daylist = -1;
  int *daylist = (use_daylist) ? read_daylist("daylist.txt", &ndays_in_daylist) : NULL;


  /* gi0 and gi1 are used for decomposing domain into many batches, if
     gi_per_sweep below is less than number of grid points requested
     (via gridres and area specification); */
  size_t gi0 = 0, gi1 = 0;

  /* Build covariance config, and set parameter limits if needed */
  cfc_compound = initialize_covfunconfig(-nkernels, 0, 0, 0, 0, 0, 0, 0);

  for (i=0; i<nkernels; i++) {
    switch (kernelcomponents[i]) {
    case 1 :
      cfc_compound->kernels[i] = initialize_covfunconfig(1, max_component_ksize, kexp_tau, kexp_llat, kexp_llon, kexp_lt, -1, -1);
      if (covariance_fitting_type) {
	set_kernel_parameter_limits_high(cfc_compound->kernels[i], kexp_tau_high, kexp_llat_high, kexp_llon_high, kexp_lt_high, -1, -1);
	set_kernel_parameter_limits_low(cfc_compound->kernels[i], kexp_tau_low, kexp_llat_low, kexp_llon_low, kexp_lt_low, -1, -1);
      }
      break;
    case 3 :
      cfc_compound->kernels[i] = initialize_covfunconfig(3, max_component_ksize, kper_tau, kper_llat, kper_llon, -1, kper_lper, -1);
      if (covariance_fitting_type) {
	set_kernel_parameter_limits_high(cfc_compound->kernels[i], kper_tau_high, kper_llat_high, kper_llon_high, -1, kper_lper_high, -1);
	set_kernel_parameter_limits_low(cfc_compound->kernels[i], kper_tau_low, kper_llat_low, kper_llon_low, -1, kper_lper_low, -1);
      }
      break;
    case 4 :
      cfc_compound->kernels[i] = initialize_covfunconfig(4, max_component_ksize, kmat_tau, kmat_llat, kmat_llon, kmat_lt, -1, -1);
      if (covariance_fitting_type) {
	set_kernel_parameter_limits_high(cfc_compound->kernels[i], kmat_tau_high, kmat_llat_high, kmat_llon_high, kmat_lt_high, -1, -1);
	set_kernel_parameter_limits_low(cfc_compound->kernels[i], kmat_tau_low, kmat_llat_low, kmat_llon_low, kmat_lt_low, -1, -1);
      }
      break;
    case 5 :
      cfc_compound->kernels[i] = initialize_covfunconfig(5, max_component_ksize, kwind_tau, kwind_l, -1, kwind_lt, -1, kwind_rho);
      set_kernel_parameter_limits_high(cfc_compound->kernels[i], kwind_tau_high, kwind_l_high, -1, kwind_lt_high, -1, kwind_rho_high);
      set_kernel_parameter_limits_low(cfc_compound->kernels[i], kwind_tau_low, kwind_l_low, -1, kwind_lt_low, -1, kwind_rho_low);
      break;
    default :
      printf("Kernel type %d not implemented, exiting...", kernelcomponents[i]);
      exit(1);
    }
  }

  /* Initialize the config object for calculating either gridGP() or
     fitting local beta parameters. The config object contains
     information that does not change over the course of the
     experiment.

     FIXME Is this needed for parameter fitting only? should you just
     change the mode then? (old comment, leave as-is if there are no
     issues)
  */
  E = create_config(area, gridres, maxdays, gi_per_sweep, obs_dist, 0, cfc_compound, NULL, dependent_variable, daylist, ndays_in_daylist);

  /* Initialize the mfstruct object according to wanted scenario. If
     local beta parameters are calibrated, then various fields need to
     be allocated. */
  if (fit_local_betas) {
    initialize_mfstruct(E.mfs, 3, mean_function, 5, bl, bh, mf_type1_coeffs, NULL, NULL, &E);
    /* If arr_2d == NULL, allocate local betas in E.mfs and set them
       to starting value determined by mf_type1_coeffs. */
    if (!E.mfs->arr_2d) {
      E.mfs->arr_2d = malloc(E.mfs->ncoeff * E.ngp * sizeof(float));
      for (j=0; j<E.mfs->ncoeff; j++) {
	for (i=0; i<E.ngp; i++) {
	  E.mfs->arr_2d[j*E.mfs->refconfig->ngp + i] = mf_type1_coeffs[j];
	}
      }
    }
    if (!E.mfs->arr_2d_prec) { /* Allocate this */
      E.mfs->arr_2d_prec = malloc(E.mfs->ncoeff*E.mfs->ncoeff*E.ngp*sizeof(float));
      for (k=0; k<E.mfs->ncoeff*E.mfs->ncoeff*E.ngp; k++) {
	E.mfs->arr_2d_prec[k] = 0;
      }
    }
  } else { /* No local beta calibration done */
    /* First line for global beta factors. If you do something else
       than mftype 3, do it here. */
    switch (mftype) {
    case 0 :
      initialize_mfstruct(E.mfs, 0, NULL, -1, NULL, NULL, NULL, NULL, NULL,  &E);
      break;
    case 1 :
      initialize_mfstruct(E.mfs, 1, mean_function, mfcoeff, NULL, NULL, mf_type1_coeffs, NULL,  NULL, &E);
      break;
    case 2 :
      printf("Warning: implementation of type 2 mean function might not work!\n");
      E_betas = create_config(mf_type2_area, mf_type2_file_gridres, 0, 0, 0, 0, NULL, NULL, dependent_variable, daylist, ndays_in_daylist);
      mftype2data = read_2d_array_from_txt(mf_type2_datafile, E_betas.ngp*maxdays, mfcoeff);
      initialize_mfstruct(E.mfs, 2, NULL, -1, NULL, NULL, mftype2data, NULL, NULL, &E_betas);
      break;
    case 3 :
      E_betas = create_config(mf_type3_area, mf_type3_file_gridres, 0, 0, 0, 0, NULL, NULL, dependent_variable, NULL, -1);
      mftype3data = read_2d_array_from_txt(mf_type3_coeff_file, E_betas.ngp, mfcoeff);
      initialize_mfstruct(E.mfs, 3, mean_function, mfcoeff, NULL, NULL, NULL, mftype3data, NULL, &E_betas);
    }
  }

  if (fit_local_betas) {
    /* Initialize state object and fill S->x,y,z,dv etc. Note that
       all grid points are dealt with at the same time for
       fitting.  */
    S = initialize_state(S, &E, 0, E.ngp, 0);
    E.obs_dist = beta_fitting_obs_dist;

    /* For beta fitting, maximum radius is altered so that we would
       not waste memory. The rationale here is that any close-by
       observations will be included and that there probably are
       anyway enough so that the far-away ones do not
       matter. Currently set at 1.5 times grid resolution at
       equator. */
    tmp = E.max_rad;
    E.max_rad = 2*M_PI/360*gridres*1.5;
    add_all_datafiles_in_dir(S, &E, datadir, maxdays);

    /* Change E.max_rad back here */
    E.max_rad = tmp;
    /* Find local beta parameters from the data that was just read,
       for current batch of gi's only. Maybe only works if gi0 = 0
       and gi1 = E->ngp. */
    fit_all_beta_parameters(S, &E, 0, E.ngp, dscale, opt_iters);
    write_1d_array_to_txt("fitted_betas.txt", E.mfs->arr_2d, E.mfs->ncoeff, E.ngp, 1);
    E.obs_dist = obs_dist;
    teardown_state(S, &E);
  }

  if (covariance_fitting_type) {
    /* Evaluate at e.g. 1000 points, initialize everything, use same
       covariance kernel form as for generation but parameters will be
       randomized. The E_MLE and S_MLE objects are used for MCMC/NLOpt
       calibration, but for creating synthetic data, the
       E_synthetic_data object will be used instead.  */

    float obsd = (covariance_fitting_type == 1) ? 1e10 : obs_dist;

    E_for_MLE = create_config(area_train_cov, 0, maxdays_train_cov, 1000000, obsd, nrefpoints, cfc_compound, NULL, dependent_variable, daylist, ndays_in_daylist);
    E_for_MLE.mfs = E.mfs;
    S_for_MLE = initialize_state(S_for_MLE, &E_for_MLE, 0, E_for_MLE.ngp, 0);

    /* Add observation data - synthetic or real - for
       calibration. FIXME some functionization could be done... */
    if (covariance_fitting_type == 1) {
      if (read_synthetic_data_from_file) {
	synthetic_data = read_2d_array_from_txt(syntheticdatafile, 4, nsyntheticdata);
      } else {
	/* Create a configuration for generating data. gi_per_sweep is
	   just some huge number that is bigger than the amount of data
	   that we have. nsyntheticdata is the number of random locations
	   where the data will be sampled. */
	E_synthetic_data = create_config(area_train_cov, 0, maxdays_train_cov, 10000000, 1e10, nsyntheticdata, cfc_compound, NULL, dependent_variable, daylist, ndays_in_daylist);
	/* The grid config for mean function is in E.mfs->refconfig */
	E_synthetic_data.mfs = E.mfs;

	/* Generate synthetic data */
	S = initialize_state(S, &E_synthetic_data, 0, E_synthetic_data.ngp, 0);
	synthetic_data = sample_from_GP(&E_synthetic_data, S);
	teardown_state(S, &E_synthetic_data);
      }

      /* Add data to S_for_MLE from synthetic_data, the generated data
	 set. For this. the parameter values of the covariance
	 function are set to maximum values. That way, whatever
	 parameter values will be proposed by whatever algorithm, the
	 appropriate data will be available for calculating GP
	 marginals with predict(). */

      float *x_low = malloc(-6*E.cfc->covftype*sizeof(float));
      float *x_high = malloc(-6*E.cfc->covftype*sizeof(float));
      float *x0 = malloc(-6*E.cfc->covftype*sizeof(float));
      uint *argidx = malloc(-6*E.cfc->covftype*sizeof(uint));
      uint npar;

      get_cfc_idx_and_limits(cfc_compound, &npar, argidx, x_low, x_high, 0, x0);
      set_cfpars_based_on_argidx(x_high, argidx, npar, &E_for_MLE);

      float lat, lon, t, dv;
      float zf = 0; /* zf for zerofloat, to use this as phony wind input */

      /* For getting normal rv's */
      struct boxmullerstruct *BM = initialize_boxmullerstruct();

      for (i=0; i<nsyntheticdata; i++) {
	lat = synthetic_data[4*i];
	lon = synthetic_data[4*i+1];
	t = synthetic_data[4*i+2];
	dv = synthetic_data[4*i+3] + normal(0, sigma_synthetic_obs, BM);
	add_datapoint_to_S_for_calibration(&E_for_MLE, S_for_MLE, &lat, &lon, &t, &zf, &zf, &dv, &sigma_synthetic_obs);
      }

      /* Return to original parameter values */
      set_cfpars_based_on_argidx(x0, argidx, npar, &E_for_MLE);
      free(x_low);
      free(x_high);
      free(x0);
      free(argidx);
    } else if (covariance_fitting_type == 2) {
      add_all_datafiles_in_dir(S_for_MLE, &E_for_MLE, datadir, maxdays_train_cov);
    } else {
      printf("Invalid covariance_fitting_type %d\n", covariance_fitting_type);
      exit(3);
    }

    find_GP_parameters(&E_for_MLE, S_for_MLE, mcmc_iters);

    teardown_config(&E);
    teardown_state(S_for_MLE, &E_for_MLE);
    teardown_config(&E_for_MLE);

    if (covariance_fitting_type == 1) {
      teardown_config(&E_synthetic_data);
    }
    // FIXME Copy still new params to E->cfc, since otherwise new
    // learned parameters are not reflected immediately and currently
    // another experiment needs to be carried out.;
  }

  // FIXME E freed above but used below, give another symbol? We should do as with S
  switch(simulation_type) {
  case 0 :
    break;
  case 1 :
    while (gi1 < E.ngp) {
      /* Set the batch limits in the state struct */
      gi0 = gi1;
      gi1 = (gi1 + E.gi_inc > E.ngp) ? E.ngp : gi1 + E.gi_inc;

      S = initialize_state(S, &E, gi0, gi1, 0);
      if ((use_gridded_winds) && (E.use_wind) && (!E.dependent_variable)){
	S->uwinds_gridded  = read_2d_array_from_txt(gridded_uwinds_file, E.ngp, E.maxdays);
	S->vwinds_gridded  = read_2d_array_from_txt(gridded_vwinds_file, E.ngp, E.maxdays);
      }
      add_all_datafiles_in_dir(S, &E, datadir, maxdays);

      /* Calculate the specified GP */
      if (!E.mode) {
	gridGP(S, E);
      } else {
	loss(S, &E);
      }

      /* Output at the very end */
      free(S->uwinds_gridded);
      free(S->vwinds_gridded);
      teardown_state(S, &E);
    }
    /* free the rest of the result arrays and config */
    teardown_config(&E);
    break;
  case 2 :
    printf("NOT IMPLEMENTED HERE YET");
    exit(4);
  }

  return 0;

};


/* Random notes / issues that could be fixed:

   Run a compound kernel MCMC - where one kernel is constructed as a
   sum of several kernels

   TODO: Give several compound kernels to the config, for consecutive
   evaluation. Other option is to just use the sequential script and
   construct two config objects.

   TODO: Above there is a FIXME stating that new learned parameters
   are not immediately used for GP prediction. Make it so that
   everything can be done in one experiment, as currently needs to be
   done.

   FIXME the ncoeff is not really needed in mfstruct. Remove if not
   needed for optimization of betas either.

   FIXME add check that if E->cfc->kernels[0]->cftype == 2, then it is
   the only kernel.

   FIXME reallocate_S always reallocates all variables, even if
   use_wind is not set. This is wasteful.

   FIXME teardown routine for arr_2d in mfs missing!

   FIXME we should somehow deal with time properly. S->totaldays,
   datadays, and E->datadays, first_day_unix_noon and first_day_noon
   should be somehow consolidated. There should be simulation day
   range and dataday range, both dealing with the daynumbers and not
   datadays.
*/
