// QUESTION 3
//
// This program models a 2-class problem using two 3-D Gaussian distributions
// with diagonal covariance matrices. It further improves the performance using
// the MCE descriminative training algorithm. Finally, the program and evaluates 
// the performance of the resulting model on the specified data set.
//

#include "math.h"
#include "stdlib.h"
#include "stdio.h"

// 2-class data container
//		A, B	: pointers to data
//		NA, NB	: data counts
//		D		: data dimensionality 
typedef struct 
{
	double *A, *B;
	int NA, NB, D;
} Data;

// model container
//		muA, muB, sigA, sigB	: mean, variance
//		pA, pB					: priors
typedef struct
{
	double *muA, *muB, *sigA, *sigB;
	double pA, pB;
} Model;

// fills an array using the specified value
inline void fill(double* arr, double val, int n)
{
    int i;
    
    for( i = 0; i < n; i++ )
        arr[i] = val;    
}

// allocates and array and fills it witht the specified value
inline double* mallocfill(size_t n, size_t size, double val)
{
    double* arr = (double*)malloc(n*size);
    fill(arr, val, n);
    return arr;
}

// calculates the moment about the mean of specified order
// used to compute mean and standard deviation of data
void moment(double* data, int D, int N, int power, double* mu, double* mom)
{
    int d, n;
    
    for( d = 0; d < D; d++ )
        for( n = 0; n < N; n++ )
            mom[d] += pow(data[d*N+n]-mu[d],power);

    for( d = 0; d < D; d++ )
        mom[d] /= N;
}

// computes the mean and diagonal covariance matrix of a Gaussian
void suffstat(double* data, int D, int N, double* mu, double* sig)
{
    double* zero = mallocfill(D,sizeof(double),0);
    moment(data, D, N, 1, zero, mu);
    moment(data, D, N, 2, mu, sig);
    free(zero);
}

// computes the logistic function
inline double logit(double a, double x)
{
    return 1/(1+exp(-a*x));
}

// computes the values of a 3-D Gaussian distribution with a diagonal covariance matrix
inline double indmvnpdf(double* data, int n, int D, int N, double* mu, double* sig)
{   
    int d;
    double a = 0, b = 1;

    for( d = 0; d < D; d++ )
    {
        a += -pow(data[d*N+n]-mu[d],2)/sig[d];
        b *= sig[d];
    }
    
    return exp(0.5*a)/sqrt(pow(6.2832,D)*b);    
}

// trains models for the two classes
Model train(Data data)
{    
    double a, b;
    double l;   
	double s = 43, e = 43;
    int i, n, d;
    int maxIter = 200;
	double* dmuA = mallocfill(data.D, sizeof(double), 0);
    double* dmuB = mallocfill(data.D, sizeof(double), 0);
    double* dsigA = mallocfill(data.D, sizeof(double), 0);
    double* dsigB = mallocfill(data.D, sizeof(double), 0);	
	Model m;
    
	m.muA  = mallocfill(data.D,sizeof(double),0);
    m.muB  = mallocfill(data.D,sizeof(double),0);
    m.sigA = mallocfill(data.D,sizeof(double),0);
    m.sigB = mallocfill(data.D,sizeof(double),0);
	m.pA = (double)data.NA/(data.NA+data.NB);
	m.pB = (double)data.NB/(data.NA+data.NB);
	
    suffstat(data.A, data.D, data.NA, m.muA, m.sigA);
    suffstat(data.B, data.D, data.NB, m.muB, m.sigB);    
    
	// MCE with GPD
    for( i = 0; i < maxIter; i++ )
    {
        fill(dmuA,0,data.D);
        fill(dmuB,0,data.D);
        fill(dsigA,0,data.D);
        fill(dsigB,0,data.D);
                
		// compute contributions to the gradient due to class A
        for( n = 0; n < data.NA; n++ )
        {
			// a and b are the MAP probabilities of the current datum
			// given models A and B
            a = m.pA*indmvnpdf(data.A,n,data.D,data.NA,m.muA,m.sigA);
            b = m.pB*indmvnpdf(data.A,n,data.D,data.NA,m.muB,m.sigB);
            
            for( d = 0; d < data.D; d++ )
            {
                l = logit(s,-a+b);
                dmuA[d]  -= l*(1-l)*a*(data.A[data.NA*d+n]-m.muA[d]);
                dmuB[d]  += l*(1-l)*b*(data.A[data.NA*d+n]-m.muB[d]);
                dsigA[d] -= l*(1-l)*a*(1+pow(data.A[data.NA*d+n]-m.muA[d],2)/m.sigA[d]);
                dsigB[d] += l*(1-l)*b*(1+pow(data.A[data.NA*d+n]-m.muB[d],2)/m.sigB[d]);
            }
        }
        
		// compute the contributions to the gradient due to class B
        for( n = 0; n < data.NB; n++ )
        {
			// a and b are the MAP probabilities of the current datum
			// given models A and B
            a = m.pA*indmvnpdf(data.B,n,data.D,data.NB,m.muA,m.sigA);
            b = m.pB*indmvnpdf(data.B,n,data.D,data.NB,m.muB,m.sigB);
            
            for( d = 0; d < data.D; d++ )
            {
                l = logit(s,-b+a);
                dmuA[d]  += l*(1-l)*a*(data.B[data.NB*d+n]-m.muA[d]);
                dmuB[d]  -= l*(1-l)*b*(data.B[data.NB*d+n]-m.muB[d]);
                dsigA[d] += l*(1-l)*a*(1+pow(data.B[data.NB*d+n]-m.muA[d],2)/m.sigA[d]);
                dsigB[d] -= l*(1-l)*b*(1+pow(data.B[data.NB*d+n]-m.muB[d],2)/m.sigB[d]);
            }
        }
              
		// proceed along the gradient to update model parameters
        for( d = 0; d < data.D; d++ )
        {
            m.muA[d] -= e*s*dmuA[d]/m.sigA[d];
            m.muB[d] -= e*s*dmuB[d]/m.sigB[d];
            m.sigA[d] -= 0.5*e*s*dsigA[d]/m.sigA[d];
            m.sigB[d] -= 0.5*e*s*dsigB[d]/m.sigB[d];   
        }        
    }
	
	return m;
}

// computes the total error of a model on a data set
float test(Data d, Model m)
{
    int n;
    float err = 0;
    double a, b;
	
	// evalute model performance using the MAP decision rule
    for( n = 0; n < d.NA; n++ )
        err += m.pA*indmvnpdf(d.A, n, d.D, d.NA, m.muA, m.sigA) <= m.pB*indmvnpdf(d.A, n, d.D, d.NA, m.muB, m.sigB);

    for( n = 0; n < d.NB; n++ )
        err += m.pA*indmvnpdf(d.B, n, d.D, d.NB, m.muA, m.sigA) >= m.pB*indmvnpdf(d.B, n, d.D, d.NB, m.muB, m.sigB);    
	
    return err/(d.NA+d.NB);
}

// reads data from a file and stores it in column-major order [x0 ... xN y0 ... yN z0 ... zN]
int readdata(char* fname, Data* set)
{
    FILE* fid = fopen(fname,"r");
    int N = 10000;
    int n;
    int nA = 0, nB = 0;
    char* c = (char*)malloc(sizeof(char)*N);
    double* x = (double*)malloc(sizeof(double)*N);
    double* y = (double*)malloc(sizeof(double)*N);
    double* z = (double*)malloc(sizeof(double)*N);
	set->NA = 0;
	set->NB = 0;
	set->D = 3;
	
	if( fid == NULL )
	{
		printf("Unable to read %s\n",fname);
		return -1;
	}
	
    for( n = 0; !feof(fid); n++ )
    {
        if( n == N )
        {
            N *= 2;
            realloc(x,sizeof(double)*N);
            realloc(y,sizeof(double)*N);
            realloc(z,sizeof(double)*N);
            realloc(c,sizeof(char)*N);
        }
		
        fscanf(fid,"%c: (%lf %lf %lf)\n",&c[n],&x[n],&y[n],&z[n]);
        
        c[n] == 'A' ? set->NA++ : set->NB++;
    }
	
    fclose(fid);
	
    N = n;
    set->A = (double*)malloc(sizeof(double)*set->NA*3);
    set->B = (double*)malloc(sizeof(double)*set->NB*3);	
	
	for( n = 0; n < N; n++ )
	{
		if( c[n] == 'A' )
		{
			set->A[nA] = x[n];
			set->A[set->NA*1+nA] = y[n];
			set->A[set->NA*2+nA] = z[n];
			nA++;
		}
		else
		{
			set->B[nB] = x[n];
			set->B[set->NB*1+nB] = y[n];
			set->B[set->NB*2+nB] = z[n];
			nB++;
		}
	}
	
	free(c);
	free(x);
	free(y);
	free(z);
	
	return 0;
}


int main(int argc, char* argv[])
{
	Data trainData, testData;

	if( argc < 3 )
	{
		printf("***********************************************\n");
		printf("Question 3\n");
		printf("Error: Not enough arguments.\n");
		printf("Please specify training and testing datasets:\n");
		printf("q3 Train.dat Test.dat\n");
		printf("***********************************************\n");		
	}
	
	// read data
	if( readdata(argv[1], &trainData) == -1 || readdata(argv[2], &testData) == -1 )
		return 0;
	
	// train model
	Model model = train(trainData);

	// test model
    printf("Train set: %2.2f%% correct\n",(1-test(trainData, model))*100);
    printf("Test  set: %2.2f%% correct\n",(1-test(testData, model))*100);	
	
	free(model.muA);
	free(model.muB);
	free(model.sigA);
	free(model.sigB);
	free(trainData.A);
	free(trainData.B);
	free(testData.A);
	free(testData.B);	
}