diff options
| author | Marin Ivanov <[email protected]> | 2025-07-25 10:17:14 +0300 |
|---|---|---|
| committer | Marin Ivanov <[email protected]> | 2026-01-18 20:09:26 +0200 |
| commit | 0168586485e6310c598713c911b1dec5618d61a1 (patch) | |
| tree | 6aabc2a12ef8fef70683f5389bea00f948015f77 /octave/melvq.m | |
* codec2 cut-down version 1.2.0
* Remove codebook and generation of sources
* remove c2dec c2enc binaries
* prepare for emscripten
Diffstat (limited to 'octave/melvq.m')
| -rw-r--r-- | octave/melvq.m | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/octave/melvq.m b/octave/melvq.m new file mode 100644 index 0000000..1e41b6e --- /dev/null +++ b/octave/melvq.m @@ -0,0 +1,165 @@ +% melvq.m +% David Rowe Aug 2015 +% +% Experimenting with VQ design for mel LSPs, also handy VQ searching routines + +1; + +% train up multi-stage VQ +% ~/codec2-dev/build_linux/src$ sox -r 8000 -s -2 ../../wav/all.wav -t raw -r 8000 -s -2 - sinc 300 sinc -2600 | ./c2sim - --lpc 6 --lpcpf --lspmel --dump all -o - | play -t raw -r 8000 -s -2 - vol 3 +% +% octave:> load ../build_linux/src/all_mel.txt +% octave:> melvq; vq = trainvq(all_mel, 64, 3); +% octave:> save vq + +function vq = trainvq(training_data, Nvec, stages, city_en=0) + + vq = []; + for i=1:stages + if city_en + [idx centers] = kmeans(training_data, Nvec, 'DISTANCE', 'cityblock'); + else + [idx centers] = kmeans(training_data, Nvec); + end + quant_error = centers(idx,:) - training_data; + printf("mse stage %d: %f\n", i, mean(std(quant_error))); + training_data = quant_error; + vq(:,:,i) = centers; + end + +end + +function [mse_list index_list] = search_vq(vq, target, m) + + [Nvec order] = size(vq); + + mse = zeros(1, Nvec); + + % find mse for each vector + + for i=1:Nvec + mse(i) = sum((target - vq(i,:)) .^2); + end + + % sort and keep top m matches + + [mse_list index_list ] = sort(mse); + + mse_list = mse_list(1:m); + index_list = index_list(1:m); + +endfunction + + +% Search multi-stage VQ, retaining m best candidates at each stage + +function [res output_vecs ind] = mbest(vqset, input_vecs, m) + + [Nvec order stages] = size(vqset); + [Ninput tmp] = size(input_vecs); + + res = []; % residual error after VQ + output_vecs = []; % quantised output vectors + ind = []; % index of vqs + + for i=1:Ninput + + % first stage, find mbest candidates + + [mse_list index_list] = search_vq(vqset(:,:,1), input_vecs(i,:), m); + cand_list = [mse_list' index_list']; + cand_list = sortrows(cand_list,1); + + % subsequent stages ........... + + for s=2:stages + + % compute m targets for next stage, and update path + + prev_indexes = zeros(m,s-1); + for t=1:m + target(t,:) = input_vecs(i,:); + for v=1:s-1 + target(t,:) -= vqset(cand_list(t,v+1),:,v); + end + prev_indexes(t,:) = cand_list(t,2:s); + end + + % search stage s using m targets from stage s-1 + % with m targets, we do m searches which return the m best possibilities + % so we get a matrix with one row per candidate, m*m rows total + % prev_indexes provides us with the path through the VQs for each candidate row + + avq = vqset(:,:,s); + cand_list = []; + for t=1:m + [mse_list index_list] = search_vq(avq, target(t,:), m); + x = ones(m,1)*prev_indexes(t,:); + cand_row = [mse_list' x index_list']; + cand_list = [cand_list; cand_row]; + end + + % sort into m best rows + + cand_list = sortrows(cand_list,1); + cand_list = cand_list(1:m,:); + + end + + % final residual + target(1,:) = input_vecs(i,:); + out = zeros(1,order); + for v=1:stages + target(1,:) -= vqset(cand_list(1,v+1),:,v); + out += vqset(cand_list(1,v+1),:,v); + end + res = [res; target(1,:)]; + output_vecs = [output_vecs; out]; + ind = [ind; cand_list(1,2:1+stages)]; + end + +endfunction + + +% Quantises a set of mel-lsps and saves back to disk so they can be read in by c2sim +% assumes we have a vq saved to disk called vq +% +% ~/codec2-dev/build_linux/src$ sox -r 8000 -s -2 ../../wav/vk5qi.wav -t raw -r 8000 -s -2 - sinc 300 sinc -2600 | ./c2sim - --lpc 6 --lpcpf --lspmel --dump vk5qi -o - | play -t raw -r 8000 -s -2 - vol 3 +% +% octave:> test_run("vk5qi") +% +% ~/codec2-dev/build_linux/src$ sox -r 8000 -s -2 ../../wav/vk5qi.wav -t raw -r 8000 -s -2 - sinc 300 sinc -2600 | ./c2sim - --lpc 6 --lpcpf --phase0 --dec 4 --postfilter --lspmel --lspmelread ../../octave/vk5qi_mel_.out -o - | play -t raw -r 8000 -s -2 - vol 3 + +function ind = test_run(samplename) + + more off; + input_vecs_name = sprintf("../build_linux/src/%s_mel.txt", samplename); + input_vecs_name + mel = load(input_vecs_name); + load vq; + [res mel_ ind] = mbest(vq, mel, 5); + mean(std(res)) + + output_vecs_name = sprintf("%s_mel_.out", samplename); + fmel_ = fopen(output_vecs_name,"wb"); + [r c] = size(mel_); + for i=1:r + fwrite(fmel_, mel_(i,:), "float32"); + end + fclose(fmel_); +end + +%ind = test_run("hts1a"); + +%load "../build_linux/src/all_mel.txt" +%vq = trainvq(all_mel, 64, 3); +%save vq; + +% [X] save text file of "vq quantised mels" +% [X] load back into c2sim at run time +% [X] train on continuous mels +% [X] sorting/stability +% [X] see how it sounds +% [X] Goal is to get VQ sounding OK, similar to UQ at 20 or 40ms dec, +% [X] sig better than current 700 +% [X] check all indexes used with hist |
