diff options
Diffstat (limited to 'misc/vqtrain.c')
| -rw-r--r-- | misc/vqtrain.c | 399 |
1 files changed, 0 insertions, 399 deletions
diff --git a/misc/vqtrain.c b/misc/vqtrain.c deleted file mode 100644 index 59e88f1..0000000 --- a/misc/vqtrain.c +++ /dev/null @@ -1,399 +0,0 @@ -/*--------------------------------------------------------------------------*\ - - FILE........: VQTRAIN.C - AUTHOR......: David Rowe - DATE CREATED: 23/2/95 - - This program trains vector quantisers using K dimensional Lloyd-Max - method. - -\*--------------------------------------------------------------------------*/ - -/* - Copyright (C) 2009 David Rowe - - All rights reserved. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License version 2.1, as - published by the Free Software Foundation. This program is - distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. -*/ - -/*-----------------------------------------------------------------------*\ - - INCLUDES - -\*-----------------------------------------------------------------------*/ - -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <math.h> -#include <ctype.h> -#include <getopt.h> -#include <time.h> - -/*-----------------------------------------------------------------------*\ - - DEFINES - -\*-----------------------------------------------------------------------*/ - -#define DELTAQ 0.005 /* quitting distortion */ -#define MAX_STR 80 /* maximum string length */ - -/*-----------------------------------------------------------------------*\ - - FUNCTION PROTOTYPES - -\*-----------------------------------------------------------------------*/ - -void zero(float v[], int k); -void acc(float v1[], float v2[], int k); -void norm(float v[], int k, long n); -long quantise(float cb[], float vec[], int k, int m, int st, int en, float *beste, float *se); - -/*-----------------------------------------------------------------------* \ - - MAIN - -\*-----------------------------------------------------------------------*/ - -int main(int argc, char *argv[]) { - long k,m; /* dimension and codebook size */ - float *vec; /* current vector */ - float *cb; /* vector codebook */ - float *cent; /* centroids for each codebook entry */ - long *n; /* number of vectors in this interval */ - long J; /* number of vectors in training set */ - long i,j; - long ind; /* index of current vector */ - float e; /* squared error for current vector */ - float se; /* squared error for this iteration */ - float var,var_1; /* current and previous iterations distortion */ - float delta; /* improvement in distortion */ - long noutliers[3];/* number of vectors quantisers with > 3*sd */ - FILE *ftrain; /* file containing training set */ - FILE *fvq; /* file containing vector quantiser */ - int ret; - float deltaq_stop = DELTAQ; - FILE *fres = NULL; - int st = -1; - int en = -1; - int init_rand = 0; - - int o = 0; - int opt_idx = 0; - while( o != -1 ) { - static struct option long_opts[] = { - {"help", no_argument, 0, 'h'}, - {"residual", required_argument, 0, 'r'}, - {"stop", required_argument, 0, 's'}, - {"st", required_argument, 0, 't'}, - {"en", required_argument, 0, 'e'}, - {"rand", no_argument, 0, 'i'}, - {0, 0, 0, 0} - }; - - o = getopt_long(argc,argv,"hr:s:t:e:",long_opts,&opt_idx); - - switch(o) { - case 'r': - fres = fopen(optarg,"wb"); assert(fres != NULL); - //fprintf(stderr, "writing res to : %s \n", optarg); - break; - case 's': - deltaq_stop = atof(optarg); - //fprintf(stderr, "deltaq_stop :%f\n", deltaq_stop); - break; - case 't': - st = atoi(optarg); - break; - case 'e': - en = atoi(optarg); - break; - case 'i': - init_rand = 1; - break; - case 'h': - case '?': - goto helpmsg; - break; - } - } - int dx = optind; - - if ((argc - dx) < 4) { - fprintf(stderr, "Too few arguments\n"); - helpmsg: - fprintf(stderr, "usage: %s [Options] TrainFile.f32 K(dimension) M(codebook size) VQFile.f32\n", argv[0]); - fprintf(stderr, " -r --residual VQResidualErrorFile.f32\n"); - fprintf(stderr, " -s --stop StopDelta\n"); - fprintf(stderr, " --st Kst start vector element for error calculation (default 0)\n"); - fprintf(stderr, " --en Ken end vector element for error calculation (default K-1)\n"); - fprintf(stderr, " --rand use random sampling for initial VQ population\n"); - exit(1); - } - - /* Open training file */ - ftrain = fopen(argv[dx],"rb"); - if (ftrain == NULL) { - printf("Error opening training database file: %s\n",argv[dx]); - exit(1); - } - - /* determine k and m, and allocate arrays */ - k = atol(argv[dx+1]); - m = atol(argv[dx+2]); - - /* default to measuring error on entire vector */ - if (st == -1) st = 0; - if (en == -1) en = k-1; - - printf("vector dimension K=%ld codebook size M=%ld ", k, m); - vec = (float*)malloc(sizeof(float)*k); - cb = (float*)malloc(sizeof(float)*k*m); - cent = (float*)malloc(sizeof(float)*k*m); - n = (long*)malloc(sizeof(long)*m); - if (vec == NULL || cb == NULL || cent == NULL || n == NULL) { - printf("Error in malloc.\n"); - exit(1); - } - - /* determine size of training set */ - J = 0; zero(cent, k); - while(fread(vec, sizeof(float), k, ftrain) == (size_t)k) { - J++; - acc(cent, vec, k); - } - printf("J=%ld vectors in training set\n", J); - - /* Lets measure 0 bit VQ (i.e. mean of training set) as starting point */ - norm(cent, k, J); - memcpy(cb, cent, k*sizeof(float)); - se = 0.0; - rewind(ftrain); - for(i=0; i<J; i++) { - ret = fread(vec, sizeof(float), k, ftrain); - assert(ret == k); - quantise(cb, vec, k, 1, st, en, &e, &se); - } - var = se/(J*(en-st+1)); - printf("\r It: 0, var: %f sd: %f\n", var, sqrt(var)); - - /* set up initial codebook state from samples of training set */ - if (init_rand) srand(time(NULL)); - for(i=0; i<m; i++) { - if (init_rand) - j = J*(float)rand()/RAND_MAX; - else - j = i*(J/m); - fseek(ftrain, j*k*sizeof(float), SEEK_SET); - ret = fread(&cb[i*k], sizeof(float), k, ftrain); - assert(ret == k); - } - - /* main loop */ - j = 1; - do { - var_1 = var; - - /* zero centroids */ - for(i=0; i<m; i++) { - zero(¢[i*k], k); - n[i] = 0; - } - - /* quantise training set */ - se = 0.0; noutliers[0] = noutliers[1] = noutliers[2] = 0; - rewind(ftrain); - for(i=0; i<J; i++) { - ret = fread(vec, sizeof(float), k, ftrain); - assert(ret == k); - ind = quantise(cb, vec, k, m, st, en, &e, &se); - n[ind]++; - acc(¢[ind*k], vec, k); - if (sqrt(e/(en-st+1)) > 1.0) noutliers[0]++; - if (sqrt(e/(en-st+1)) > 2.0) noutliers[1]++; - if (sqrt(e/(en-st+1)) > 3.0) noutliers[2]++; - } - var = se/(J*(en-st+1)); - delta = (var_1-var)/var; - int n_min = J; - int n_max = 0; - for(i=0; i<m; i++) { - if (n[i] < n_min) n_min = n[i]; - if (n[i] > n_max) n_max = n[i]; - } - printf("\r It: %2ld, var: %5f sd: %f outliers > 1/2/3 dB = %3.2f/%3.2f/%3.2f Delta = %5.4f %d %d\n", j, var, sqrt(var), - (float)noutliers[0]/J, (float)noutliers[1]/J, (float)noutliers[2]/J, delta, n_min, n_max); - j++; - - /* determine new codebook from centroids */ - if (delta > deltaq_stop) - for(i=0; i<m; i++) { - if (n[i] != 0) { - norm(¢[i*k], k, n[i]); - memcpy(&cb[i*k], ¢[i*k], k*sizeof(float)); - } - } - - } while (delta > deltaq_stop); - - /* save VQ to disk */ - fvq = fopen(argv[dx+3],"wt"); - if (fvq == NULL) { - printf("Error opening VQ file: %s\n",argv[dx+3]); - exit(1); - } - - fwrite(cb, sizeof(float), m*k, fvq); - - /* optionally output residual error for next stage VQ */ - if (fres != NULL) { - float res[k]; - rewind(ftrain); - for(j=0; j<J; j++) { - ret = fread(vec, sizeof(float), k, ftrain); - ind = quantise(cb, vec, k, m, st, en, &e, &se); - for(i=0; i<k; i++) { - res[i] = vec[i] - cb[k*ind+i]; - } - fwrite(res, sizeof(float), k, fres); - } - fclose(fres); - } - - fclose(fvq); - fclose(ftrain); - free(vec); - free(n); - - return 0; -} - -/*-----------------------------------------------------------------------*\ - - FUNCTIONS - -\*-----------------------------------------------------------------------*/ - -/*-----------------------------------------------------------------------*\ - - FUNCTION....: zero() - - AUTHOR......: David Rowe - DATE CREATED: 23/2/95 - - Zeros a vector of length k. - -\*-----------------------------------------------------------------------*/ - -void zero(float v[], int k) -/* float v[]; ptr to start of vector */ -/* int k; lngth of vector */ -{ - int i; - - for(i=0; i<k; i++) - v[i] = 0.0; -} - -/*---------------------------------------------------------------------------*\ - - FUNCTION....: acc() - - AUTHOR......: David Rowe - DATE CREATED: 23/2/95 - - Adds k dimensional vectors v1 to v2 and stores the result back in v1. - -\*---------------------------------------------------------------------------*/ - -void acc(float v1[], float v2[], int k) -/* float v1[]; ptr to start of vector to accumulate */ -/* float v2[]; ptr to start of vector to add */ -/* int k; dimension of vectors */ -{ - int i; - - for(i=0; i<k; i++) - v1[i] += v2[i]; -} - -/*---------------------------------------------------------------------------*\ - - FUNCTION....: norm() - - AUTHOR......: David Rowe - DATE CREATED: 23/2/95 - - Divides each element in k dimensional vector v by n. - -\*---------------------------------------------------------------------------*/ - -void norm(float v[], int k, long n) -/* float v[]; ptr to start of vector */ -/* int k; dimension of vectors */ -/* int n; normalising factor */ -{ - int i; - - assert(n != 0); - for(i=0; i<k; i++) - v[i] /= n; -} - -/*---------------------------------------------------------------------------*\ - - FUNCTION....: quantise() - AUTHOR......: David Rowe - DATE CREATED: 23/2/95 - - Quantises vec by choosing the nearest vector in codebook cb, and - returns the vector index. The squared error of the quantised vector - is added to se. - -\*---------------------------------------------------------------------------*/ - -long quantise(float cb[], // current VQ codebook - float vec[], // cb[][k]; current VQ codebook - int k, // dimension of vectors - int m, // size of codebook - int st, // start sample of vector to use in error calc - int en, // end sample of vector to use in error calc - float *beste, // current squared error - float *se) // accumulated squared error -{ - long besti; /* best index so far */ - long j; - int i; - float diff,e; - - besti = 0; - *beste = 1E32; - for(j=0; j<m; j++) { - e = 0.0; - for(i=st; i<=en; i++) { - diff = cb[j*k+i]-vec[i]; - e += diff*diff; - } - if (e < *beste) { - *beste = e; - besti = j; - } - } - - *se += *beste; - - return(besti); -} - |
