// QUESTION 3 // // This program models a 2-class problem using two 3-D Gaussian distributions // with diagonal covariance matrices. It further improves the performance using // the MCE descriminative training algorithm. Finally, the program and evaluates // the performance of the resulting model on the specified data set. // #include "math.h" #include "stdlib.h" #include "stdio.h" // 2-class data container // A, B : pointers to data // NA, NB : data counts // D : data dimensionality typedef struct { double *A, *B; int NA, NB, D; } Data; // model container // muA, muB, sigA, sigB : mean, variance // pA, pB : priors typedef struct { double *muA, *muB, *sigA, *sigB; double pA, pB; } Model; // fills an array using the specified value inline void fill(double* arr, double val, int n) { int i; for( i = 0; i < n; i++ ) arr[i] = val; } // allocates and array and fills it witht the specified value inline double* mallocfill(size_t n, size_t size, double val) { double* arr = (double*)malloc(n*size); fill(arr, val, n); return arr; } // calculates the moment about the mean of specified order // used to compute mean and standard deviation of data void moment(double* data, int D, int N, int power, double* mu, double* mom) { int d, n; for( d = 0; d < D; d++ ) for( n = 0; n < N; n++ ) mom[d] += pow(data[d*N+n]-mu[d],power); for( d = 0; d < D; d++ ) mom[d] /= N; } // computes the mean and diagonal covariance matrix of a Gaussian void suffstat(double* data, int D, int N, double* mu, double* sig) { double* zero = mallocfill(D,sizeof(double),0); moment(data, D, N, 1, zero, mu); moment(data, D, N, 2, mu, sig); free(zero); } // computes the logistic function inline double logit(double a, double x) { return 1/(1+exp(-a*x)); } // computes the values of a 3-D Gaussian distribution with a diagonal covariance matrix inline double indmvnpdf(double* data, int n, int D, int N, double* mu, double* sig) { int d; double a = 0, b = 1; for( d = 0; d < D; d++ ) { a += -pow(data[d*N+n]-mu[d],2)/sig[d]; b *= sig[d]; } return exp(0.5*a)/sqrt(pow(6.2832,D)*b); } // trains models for the two classes Model train(Data data) { double a, b; double l; double s = 43, e = 43; int i, n, d; int maxIter = 200; double* dmuA = mallocfill(data.D, sizeof(double), 0); double* dmuB = mallocfill(data.D, sizeof(double), 0); double* dsigA = mallocfill(data.D, sizeof(double), 0); double* dsigB = mallocfill(data.D, sizeof(double), 0); Model m; m.muA = mallocfill(data.D,sizeof(double),0); m.muB = mallocfill(data.D,sizeof(double),0); m.sigA = mallocfill(data.D,sizeof(double),0); m.sigB = mallocfill(data.D,sizeof(double),0); m.pA = (double)data.NA/(data.NA+data.NB); m.pB = (double)data.NB/(data.NA+data.NB); suffstat(data.A, data.D, data.NA, m.muA, m.sigA); suffstat(data.B, data.D, data.NB, m.muB, m.sigB); // MCE with GPD for( i = 0; i < maxIter; i++ ) { fill(dmuA,0,data.D); fill(dmuB,0,data.D); fill(dsigA,0,data.D); fill(dsigB,0,data.D); // compute contributions to the gradient due to class A for( n = 0; n < data.NA; n++ ) { // a and b are the MAP probabilities of the current datum // given models A and B a = m.pA*indmvnpdf(data.A,n,data.D,data.NA,m.muA,m.sigA); b = m.pB*indmvnpdf(data.A,n,data.D,data.NA,m.muB,m.sigB); for( d = 0; d < data.D; d++ ) { l = logit(s,-a+b); dmuA[d] -= l*(1-l)*a*(data.A[data.NA*d+n]-m.muA[d]); dmuB[d] += l*(1-l)*b*(data.A[data.NA*d+n]-m.muB[d]); dsigA[d] -= l*(1-l)*a*(1+pow(data.A[data.NA*d+n]-m.muA[d],2)/m.sigA[d]); dsigB[d] += l*(1-l)*b*(1+pow(data.A[data.NA*d+n]-m.muB[d],2)/m.sigB[d]); } } // compute the contributions to the gradient due to class B for( n = 0; n < data.NB; n++ ) { // a and b are the MAP probabilities of the current datum // given models A and B a = m.pA*indmvnpdf(data.B,n,data.D,data.NB,m.muA,m.sigA); b = m.pB*indmvnpdf(data.B,n,data.D,data.NB,m.muB,m.sigB); for( d = 0; d < data.D; d++ ) { l = logit(s,-b+a); dmuA[d] += l*(1-l)*a*(data.B[data.NB*d+n]-m.muA[d]); dmuB[d] -= l*(1-l)*b*(data.B[data.NB*d+n]-m.muB[d]); dsigA[d] += l*(1-l)*a*(1+pow(data.B[data.NB*d+n]-m.muA[d],2)/m.sigA[d]); dsigB[d] -= l*(1-l)*b*(1+pow(data.B[data.NB*d+n]-m.muB[d],2)/m.sigB[d]); } } // proceed along the gradient to update model parameters for( d = 0; d < data.D; d++ ) { m.muA[d] -= e*s*dmuA[d]/m.sigA[d]; m.muB[d] -= e*s*dmuB[d]/m.sigB[d]; m.sigA[d] -= 0.5*e*s*dsigA[d]/m.sigA[d]; m.sigB[d] -= 0.5*e*s*dsigB[d]/m.sigB[d]; } } return m; } // computes the total error of a model on a data set float test(Data d, Model m) { int n; float err = 0; double a, b; // evalute model performance using the MAP decision rule for( n = 0; n < d.NA; n++ ) err += m.pA*indmvnpdf(d.A, n, d.D, d.NA, m.muA, m.sigA) <= m.pB*indmvnpdf(d.A, n, d.D, d.NA, m.muB, m.sigB); for( n = 0; n < d.NB; n++ ) err += m.pA*indmvnpdf(d.B, n, d.D, d.NB, m.muA, m.sigA) >= m.pB*indmvnpdf(d.B, n, d.D, d.NB, m.muB, m.sigB); return err/(d.NA+d.NB); } // reads data from a file and stores it in column-major order [x0 ... xN y0 ... yN z0 ... zN] int readdata(char* fname, Data* set) { FILE* fid = fopen(fname,"r"); int N = 10000; int n; int nA = 0, nB = 0; char* c = (char*)malloc(sizeof(char)*N); double* x = (double*)malloc(sizeof(double)*N); double* y = (double*)malloc(sizeof(double)*N); double* z = (double*)malloc(sizeof(double)*N); set->NA = 0; set->NB = 0; set->D = 3; if( fid == NULL ) { printf("Unable to read %s\n",fname); return -1; } for( n = 0; !feof(fid); n++ ) { if( n == N ) { N *= 2; realloc(x,sizeof(double)*N); realloc(y,sizeof(double)*N); realloc(z,sizeof(double)*N); realloc(c,sizeof(char)*N); } fscanf(fid,"%c: (%lf %lf %lf)\n",&c[n],&x[n],&y[n],&z[n]); c[n] == 'A' ? set->NA++ : set->NB++; } fclose(fid); N = n; set->A = (double*)malloc(sizeof(double)*set->NA*3); set->B = (double*)malloc(sizeof(double)*set->NB*3); for( n = 0; n < N; n++ ) { if( c[n] == 'A' ) { set->A[nA] = x[n]; set->A[set->NA*1+nA] = y[n]; set->A[set->NA*2+nA] = z[n]; nA++; } else { set->B[nB] = x[n]; set->B[set->NB*1+nB] = y[n]; set->B[set->NB*2+nB] = z[n]; nB++; } } free(c); free(x); free(y); free(z); return 0; } int main(int argc, char* argv[]) { Data trainData, testData; if( argc < 3 ) { printf("***********************************************\n"); printf("Question 3\n"); printf("Error: Not enough arguments.\n"); printf("Please specify training and testing datasets:\n"); printf("q3 Train.dat Test.dat\n"); printf("***********************************************\n"); } // read data if( readdata(argv[1], &trainData) == -1 || readdata(argv[2], &testData) == -1 ) return 0; // train model Model model = train(trainData); // test model printf("Train set: %2.2f%% correct\n",(1-test(trainData, model))*100); printf("Test set: %2.2f%% correct\n",(1-test(testData, model))*100); free(model.muA); free(model.muB); free(model.sigA); free(model.sigB); free(trainData.A); free(trainData.B); free(testData.A); free(testData.B); }