/*
  \file ex01.cp

  This is a file that demonstrates the use of a few tools in libgibbsit,
  stored in gibbsit. These tools work on binary time-series.

  We are given a time-series T, of length K. The 1s and 0s in it
  appear as a very correlated sequence. We are going to do this:

  1. We will fit a first- and second-order Markov models and see which
     fits better. We'll find that second-order fits better than
     first-order.

     A. If we do find the first-order Markov model to fit better,
     estimate alpha and beta, the transition probabilities of a
     2-state Markov chain.

  2. We'll fit a first-order Markov and an independent draw model and
     see which fits better. first-order Markov will fit better than
     independent.

  3. We'll thin the chain by k+1, (initial k = 1, meaning no thinning)
     and repeat. At a point, (2) above will show first-order Markov
     winning out; later we'll see that in (3), the independent model
     wins out.

     Jaideep Ray, jairay@sandia.gov, 10/19/2012
*/

#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <cassert>

extern "C" {
#include "mcconv.h"
}

int readFromFile(std::string fname, std::vector<int> &timeSeries) ;
int thinTimeSeries(std::vector<int> &timeSeries, int k, std::vector<int> &thinTimeSeries) ;

int main(int argc, char **argv)
{

  /* Required copyright banner */
  std::cout << std::endl
    << " ==================================================================== " << std::endl
    << " Copyright 2012 Sandia Corporation. Under the terms of Contract" << std::endl
    << " DE-AC04-94AL85000, there is a non-exclusive license for use of this" << std::endl
    << " work by or on behalf of the U.S. Government. Export of this program" << std::endl
    << " may require a license from the United States Government." << std::endl << std::endl
    << " ==================================================================== " << std::endl << std::endl;
 
  int err ;
  std::vector<int> timeSeries ;
  err = readFromFile(std::string("timeSeries.dat"), timeSeries) ;
  assert(err == 0) ;

  /*
    Vary the thin factor from 1 to 20. However, make sure the
  time-series never becomes too small, say 100 long
  */
  int kmax = MIN(10, timeSeries.size()/100) ;
  for (int k = 2; k < kmax ; k++)
  {
    std::vector<int> thinTS ; // This will contain the k-thinned time-series
    err = thinTimeSeries(timeSeries, k, thinTS) ; // thin it

    /*
      Do the test for 1st-order vs 2nd-order Markov property. Get back
      G2 statistic ratio of likelihoods of 1st-order to 2nd-order
      Markov models and the \Delta BIC for the same
    */
    double g2A, bicA ;
    mctest(thinTS.data(), thinTS.size(), &g2A, &bicA) ;
    std::cout << "thin ratio = " << k << " Compare 1st- v/s 2nd-order Markov [G2, BIC] = ["
	      << g2A << ", " << bicA << "] " << std::endl ;
    
    if (bicA < 0) // BIC_1st - BIC_2nd
    {
      /*
	Assume that the series is 1st-order Markov and try to transition
	probabilities. they'll be wrong until we manage to hit 
      */
      double alpha, beta ;
      mcest(thinTS.data(), thinTS.size(), &alpha, &beta) ;
      std::cout << " ---> 1st-order Markov achieved! Transition probabilties [k, alpha, beta] = [" 
		<< k << ", " << alpha << ", " << beta << "] " << std::endl ;
    }

    /*
      Do the test for independent versus 1st-order Markov test
    */
    double g2B, bicB ;
    indtest(thinTS.data(), thinTS.size(), &g2B, &bicB) ;
    std::cout << "thin ratio = " << k << " Compare independence v/s 1st-order Markov [G2, BIC] = ["
	      << g2B << ", " << bicB << "] " << std::endl ;
    if (bicB < 0)
      std::cout << " =====> Independence achieved at k = " << k
		<< std::endl ;

    // Blank line at end of iteration
    std::cout << std::endl ;
  }
  return(0) ;
}

/***********************************************************************************/

/**
   Function that read in a time-series file
   @param fname string, with the file name
   @param timeSeries, an STL int vector that will be resized and
   filled up with a binary time series read from the file
   @return -1 if file not found; 0 if OK
*/
int readFromFile(std::string fname, std::vector<int> &timeSeries)
{
  std::ifstream fin(fname.c_str()) ;
  if (!fin) 
  {
    std::cerr << " readFromFile() File " << fname << " not found; return with -1"
	      << std::endl ;
    return(-1) ;
  }

  int length = 0 ;
  fin >> length ;
  std::cout << " readFromFile() Length of the time series is " << length << std::endl ;

  timeSeries.assign(length, 0) ;
  for (int i = 0; i < length; i++)
    fin >> timeSeries[i] ;
  fin.close();

  return(0) ;
}

/** 
    Function that thins a binary timeSeries
    @param timeSeries, STL vector<int> of 0s and 1s
    @param k, thinning factor
    @param thinTimeSeries, the thinned version which will be filled up
    @return 0 if OK, -1 if something went wrong
*/
int thinTimeSeries(std::vector<int> &timeSeries, int k, std::vector<int> &thinTimeSeries) 
{
  // How long will the thinned time-series be?
  int l ;
  if ( (timeSeries.size()%k) == 0 )
    l = timeSeries.size() / k ;
  else
    l = timeSeries.size() / k + 1 ;
  thinTimeSeries.resize(l, 0) ; // Resize to length l, fill with zeros
  
  int ii = 0;
  for (int i = 0; i < timeSeries.size(); i = i + k)
  {
    thinTimeSeries[ii] = timeSeries[i] ;
    ii = ii + 1;
  }
  return(0) ;
}
