aboutsummaryrefslogtreecommitdiff
path: root/octave/melvq.m
diff options
context:
space:
mode:
authorAuthor Name <[email protected]>2023-07-07 12:20:59 +0930
committerDavid Rowe <[email protected]>2023-07-07 12:29:06 +0930
commitac7c48b4dee99d4c772f133d70d8d1b38262fcd2 (patch)
treea2d0ace57a9c0e2e5b611c4987f6fed1b38b81e7 /octave/melvq.m
shallow zip-file copy from codec2 e9d726bf20
Diffstat (limited to 'octave/melvq.m')
-rw-r--r--octave/melvq.m165
1 files changed, 165 insertions, 0 deletions
diff --git a/octave/melvq.m b/octave/melvq.m
new file mode 100644
index 0000000..1e41b6e
--- /dev/null
+++ b/octave/melvq.m
@@ -0,0 +1,165 @@
+% melvq.m
+% David Rowe Aug 2015
+%
+% Experimenting with VQ design for mel LSPs, also handy VQ searching routines
+
+1;
+
+% train up multi-stage VQ
+% ~/codec2-dev/build_linux/src$ sox -r 8000 -s -2 ../../wav/all.wav -t raw -r 8000 -s -2 - sinc 300 sinc -2600 | ./c2sim - --lpc 6 --lpcpf --lspmel --dump all -o - | play -t raw -r 8000 -s -2 - vol 3
+%
+% octave:> load ../build_linux/src/all_mel.txt
+% octave:> melvq; vq = trainvq(all_mel, 64, 3);
+% octave:> save vq
+
+function vq = trainvq(training_data, Nvec, stages, city_en=0)
+
+ vq = [];
+ for i=1:stages
+ if city_en
+ [idx centers] = kmeans(training_data, Nvec, 'DISTANCE', 'cityblock');
+ else
+ [idx centers] = kmeans(training_data, Nvec);
+ end
+ quant_error = centers(idx,:) - training_data;
+ printf("mse stage %d: %f\n", i, mean(std(quant_error)));
+ training_data = quant_error;
+ vq(:,:,i) = centers;
+ end
+
+end
+
+function [mse_list index_list] = search_vq(vq, target, m)
+
+ [Nvec order] = size(vq);
+
+ mse = zeros(1, Nvec);
+
+ % find mse for each vector
+
+ for i=1:Nvec
+ mse(i) = sum((target - vq(i,:)) .^2);
+ end
+
+ % sort and keep top m matches
+
+ [mse_list index_list ] = sort(mse);
+
+ mse_list = mse_list(1:m);
+ index_list = index_list(1:m);
+
+endfunction
+
+
+% Search multi-stage VQ, retaining m best candidates at each stage
+
+function [res output_vecs ind] = mbest(vqset, input_vecs, m)
+
+ [Nvec order stages] = size(vqset);
+ [Ninput tmp] = size(input_vecs);
+
+ res = []; % residual error after VQ
+ output_vecs = []; % quantised output vectors
+ ind = []; % index of vqs
+
+ for i=1:Ninput
+
+ % first stage, find mbest candidates
+
+ [mse_list index_list] = search_vq(vqset(:,:,1), input_vecs(i,:), m);
+ cand_list = [mse_list' index_list'];
+ cand_list = sortrows(cand_list,1);
+
+ % subsequent stages ...........
+
+ for s=2:stages
+
+ % compute m targets for next stage, and update path
+
+ prev_indexes = zeros(m,s-1);
+ for t=1:m
+ target(t,:) = input_vecs(i,:);
+ for v=1:s-1
+ target(t,:) -= vqset(cand_list(t,v+1),:,v);
+ end
+ prev_indexes(t,:) = cand_list(t,2:s);
+ end
+
+ % search stage s using m targets from stage s-1
+ % with m targets, we do m searches which return the m best possibilities
+ % so we get a matrix with one row per candidate, m*m rows total
+ % prev_indexes provides us with the path through the VQs for each candidate row
+
+ avq = vqset(:,:,s);
+ cand_list = [];
+ for t=1:m
+ [mse_list index_list] = search_vq(avq, target(t,:), m);
+ x = ones(m,1)*prev_indexes(t,:);
+ cand_row = [mse_list' x index_list'];
+ cand_list = [cand_list; cand_row];
+ end
+
+ % sort into m best rows
+
+ cand_list = sortrows(cand_list,1);
+ cand_list = cand_list(1:m,:);
+
+ end
+
+ % final residual
+ target(1,:) = input_vecs(i,:);
+ out = zeros(1,order);
+ for v=1:stages
+ target(1,:) -= vqset(cand_list(1,v+1),:,v);
+ out += vqset(cand_list(1,v+1),:,v);
+ end
+ res = [res; target(1,:)];
+ output_vecs = [output_vecs; out];
+ ind = [ind; cand_list(1,2:1+stages)];
+ end
+
+endfunction
+
+
+% Quantises a set of mel-lsps and saves back to disk so they can be read in by c2sim
+% assumes we have a vq saved to disk called vq
+%
+% ~/codec2-dev/build_linux/src$ sox -r 8000 -s -2 ../../wav/vk5qi.wav -t raw -r 8000 -s -2 - sinc 300 sinc -2600 | ./c2sim - --lpc 6 --lpcpf --lspmel --dump vk5qi -o - | play -t raw -r 8000 -s -2 - vol 3
+%
+% octave:> test_run("vk5qi")
+%
+% ~/codec2-dev/build_linux/src$ sox -r 8000 -s -2 ../../wav/vk5qi.wav -t raw -r 8000 -s -2 - sinc 300 sinc -2600 | ./c2sim - --lpc 6 --lpcpf --phase0 --dec 4 --postfilter --lspmel --lspmelread ../../octave/vk5qi_mel_.out -o - | play -t raw -r 8000 -s -2 - vol 3
+
+function ind = test_run(samplename)
+
+ more off;
+ input_vecs_name = sprintf("../build_linux/src/%s_mel.txt", samplename);
+ input_vecs_name
+ mel = load(input_vecs_name);
+ load vq;
+ [res mel_ ind] = mbest(vq, mel, 5);
+ mean(std(res))
+
+ output_vecs_name = sprintf("%s_mel_.out", samplename);
+ fmel_ = fopen(output_vecs_name,"wb");
+ [r c] = size(mel_);
+ for i=1:r
+ fwrite(fmel_, mel_(i,:), "float32");
+ end
+ fclose(fmel_);
+end
+
+%ind = test_run("hts1a");
+
+%load "../build_linux/src/all_mel.txt"
+%vq = trainvq(all_mel, 64, 3);
+%save vq;
+
+% [X] save text file of "vq quantised mels"
+% [X] load back into c2sim at run time
+% [X] train on continuous mels
+% [X] sorting/stability
+% [X] see how it sounds
+% [X] Goal is to get VQ sounding OK, similar to UQ at 20 or 40ms dec,
+% [X] sig better than current 700
+% [X] check all indexes used with hist