You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1751 lines
46 KiB
1751 lines
46 KiB
% newamp.m
|
|
%
|
|
% Copyright David Rowe 2015
|
|
% This program is distributed under the terms of the GNU General Public License
|
|
% Version 2
|
|
%
|
|
% Library of Octave functions to explore new ideas in amplitude
|
|
% (spectral envelope) modelling. See newamp_fby (frame by frame
|
|
% analysis) and newamp_batch (batch processing for listening tests)
|
|
%
|
|
% Code here to support a bunch of experimental ideas, many that didn't work out.
|
|
|
|
1;
|
|
melvq; % mbest VQ functions
|
|
|
|
% --------------------------------------------------------------------------------
|
|
% Functions used by rate K mel work
|
|
% --------------------------------------------------------------------------------
|
|
|
|
function y = lanczos2(x)
|
|
y = sinc(x).*sinc(x/2);
|
|
endfunction
|
|
|
|
function y = interp_lanczos(xp, yp, xp_max, x)
|
|
|
|
y = zeros(1,length(x));
|
|
k = 1;
|
|
for i=1:length(x)
|
|
% find closest sample in xp just greater than xi
|
|
xi = x(i);
|
|
while (xp(k) <= xi) && (k < length(xp)-2)
|
|
k++;
|
|
end
|
|
|
|
% we'd like to use k-2 .. k+2, but we need to stay inside limits of xp
|
|
|
|
k_st = k - 2; k_st = max(1,k_st);
|
|
k_en = k + 2; k_en = min(length(xp),k_en);
|
|
% printf("i: %d xi: %f k: %d k_st: %d k_en: %d\n", i, xi, k, k_st, k_en);
|
|
|
|
% map frequencies to x in -2 ... + 2
|
|
|
|
delta = xp(2) - xp(1);
|
|
xl = (xp(k_st:k_en) - xi)/delta;
|
|
|
|
y(i) = lanczos2(xl) * yp(k_st:k_en)';
|
|
|
|
end
|
|
|
|
endfunction
|
|
|
|
|
|
% General 2nd order parabolic interpolator. Used splines orginally,
|
|
% but this is much simpler and we don't need much accuracy. Given two
|
|
% vectors of points xp and yp, find interpolated values y at points x
|
|
%
|
|
|
|
% If a point in x is less than the smallest point in xp, we linearly
|
|
% interpolate down to (0,0). If a point in x is greater than the
|
|
% greatest value in xp, we linearly interpolate down to (xp_max, 0)
|
|
|
|
function y = interp_para(xp, yp, xp_max, x)
|
|
assert( (length(xp) >=3) && (length(yp) >= 3) );
|
|
|
|
y = zeros(1,length(x));
|
|
k = 1;
|
|
for i=1:length(x)
|
|
xi = x(i);
|
|
|
|
% k is index into xp of where we start 3 points used to form parabola
|
|
|
|
while (xp(k) < xi) && (k < length(xp))
|
|
k++;
|
|
end
|
|
|
|
%printf("xi: %f k = %d\n", xi, k);
|
|
if k == 1
|
|
% linear interpolation at low end
|
|
x1 = 0; y1 = 0;
|
|
x2 = xp(k); y2 = yp(k);
|
|
b = (y2-y1)/(x2-x1);
|
|
y(i) = b*(xi-x2) + y2;
|
|
%printf("lin1 k: %d i: %d xi: %f x1: %f y1: %f\n", k, i, xi, x1, y1);
|
|
elseif k < length(xp)
|
|
% parabolic interpolation
|
|
x1 = xp(k-1); y1 = yp(k-1);
|
|
x2 = xp(k); y2 = yp(k);
|
|
x3 = xp(k+1); y3 = yp(k+1);
|
|
a = ((y3-y2)/(x3-x2)-(y2-y1)/(x2-x1))/(x3-x1);
|
|
b = ((y3-y2)/(x3-x2)*(x2-x1)+(y2-y1)/(x2-x1)*(x3-x2))/(x3-x1);
|
|
y(i) = a*(xi-x2)^2 + b*(xi-x2) + y2;
|
|
%printf("para1 k: %d i: %d xi: %f x1: %f y1: %f\n", k, i, xi, x1, y1);
|
|
elseif (k == length(xp)) && (xi < xp(k))
|
|
% parabolic interpolation, but shift xp points back by 1
|
|
x1 = xp(k-2); y1 = yp(k-2);
|
|
x2 = xp(k-1); y2 = yp(k-1);
|
|
x3 = xp(k); y3 = yp(k);
|
|
a = ((y3-y2)/(x3-x2)-(y2-y1)/(x2-x1))/(x3-x1);
|
|
b = ((y3-y2)/(x3-x2)*(x2-x1)+(y2-y1)/(x2-x1)*(x3-x2))/(x3-x1);
|
|
y(i) = a*(xi-x2)^2 + b*(xi-x2) + y2;
|
|
%printf("para2 k: %d i: %d xi: %f x1: %f y1: %f\n", k, i, xi, x1, y1);
|
|
elseif k == length(xp)
|
|
% linear interpolation at high end
|
|
x1 = xp(k); y1 = yp(k);
|
|
x2 = xp_max; y2 = 0;
|
|
b = (y2-y1)/(x2-x1);
|
|
y(i) = b*(xi-x1) + y1;
|
|
%printf("lin2 k: %d i: %d xi: %f x1: %f y1: %f\n", k, i, xi, x1, y1);
|
|
end
|
|
|
|
end
|
|
endfunction
|
|
|
|
|
|
% choose largest sample in band, idea is we care more about finding
|
|
% peaks, can handle some error in frequency. x are non linear
|
|
% (arbitrary) sampling points in kHz
|
|
|
|
function y = interp_largest(f0_Hz, AmdB, x_kHz)
|
|
L = length(AmdB);
|
|
x = x_kHz*1000;
|
|
y = zeros(1,length(x));
|
|
bw = x(2) - x(1);
|
|
k = 1;
|
|
|
|
for i=1:length(x)
|
|
|
|
% determine limits of this band
|
|
|
|
if i>1
|
|
bw = x(i) - x(i-1);
|
|
end
|
|
band_low = x(i) - bw/2; band_high = x(i) + bw/2;
|
|
|
|
% map band limits to harmonics
|
|
|
|
if x(i) < f0_Hz
|
|
m_low = m_high = 1;
|
|
else
|
|
m_low = round(band_low/f0_Hz); m_high = round(band_high/f0_Hz)-1;
|
|
m_low = max(1, m_low); m_high = min(L, m_high); m_high = max(m_low, m_high);
|
|
end
|
|
|
|
printf("L: %d f0: %f i: %d band_low: %f band_high: %f m_low: %d m_high: %d\n",L, f0_Hz, i, band_low, band_high, m_low, m_high);
|
|
% find max in band
|
|
|
|
y(i) = max(AmdB(m_low:m_high));
|
|
end
|
|
|
|
endfunction
|
|
|
|
% simple linear interpolator
|
|
|
|
function y = interp_linear(xp, yp, x)
|
|
assert( (length(xp) == 2) && (length(yp) == 2) );
|
|
|
|
m = (yp(2) - yp(1))/(xp(2) - xp(1));
|
|
c = yp(1) - m*xp(1);
|
|
|
|
y = zeros(1,length(x));
|
|
for i=1:length(x)
|
|
y(i) = m*x(i) + c;
|
|
end
|
|
endfunction
|
|
|
|
|
|
% quantise input sample to nearest value in table, optionally return binary code
|
|
|
|
function [quant_out best_i bits] = quantise(levels, quant_in)
|
|
|
|
% find closest quantiser level
|
|
|
|
best_se = 1E32;
|
|
for i=1:length(levels)
|
|
se = (levels(i) - quant_in)^2;
|
|
if se < best_se
|
|
quant_out = levels(i);
|
|
best_se = se;
|
|
best_i = i;
|
|
end
|
|
end
|
|
|
|
% convert index to binary bits
|
|
|
|
numbits = ceil(log2(length(levels)));
|
|
bits = zeros(1, numbits);
|
|
for b=1:numbits
|
|
bits(b) = bitand(best_i-1,2^(numbits-b)) != 0;
|
|
end
|
|
|
|
endfunction
|
|
|
|
|
|
% Quantisation functions for Wo in log freq domain
|
|
|
|
function index = encode_log_Wo(Wo, bits)
|
|
Wo_levels = 2.^bits;
|
|
Wo_min = 2*pi/160;
|
|
Wo_max = 2*pi/20;
|
|
|
|
norm = (log10(Wo) - log10(Wo_min))/(log10(Wo_max) - log10(Wo_min));
|
|
index = floor(Wo_levels * norm + 0.5);
|
|
index = max(index, 0);
|
|
index = min(index, Wo_levels-1);
|
|
endfunction
|
|
|
|
|
|
function Wo = decode_log_Wo(index, bits)
|
|
Wo_levels = 2.^bits;
|
|
Wo_min = 2*pi/160;
|
|
Wo_max = 2*pi/20;
|
|
|
|
step = (log10(Wo_max) - log10(Wo_min))/Wo_levels;
|
|
Wo = log10(Wo_min) + step*index;
|
|
|
|
Wo = 10 .^ Wo;
|
|
endfunction
|
|
|
|
|
|
% convert index to binary bits
|
|
|
|
function bits = index_to_bits(value, numbits)
|
|
levels = 2.^numbits;
|
|
bits = zeros(1, numbits);
|
|
for b=1:numbits
|
|
bits(b) = bitand(value,2^(numbits-b)) != 0;
|
|
end
|
|
end
|
|
|
|
|
|
function value = bits_to_index(bits, numbits)
|
|
value = 2.^(numbits-1:-1:0) * bits;
|
|
endfunction
|
|
|
|
|
|
% helper function to find polynomial coeffs for a parabola
|
|
|
|
function b = parabola_coeffs(x, y)
|
|
A = [(x.^2)' x' [1 1 1]']
|
|
b = inv(A)*y';
|
|
endfunction
|
|
|
|
|
|
% generate a zig-zag linear to square mapping matrix
|
|
|
|
function map = create_zigzag_map(nr,nc)
|
|
map = zeros(nr, nc);
|
|
|
|
state = 'zig';
|
|
r = c = 1;
|
|
|
|
for i=1:nr*nc
|
|
|
|
printf("%s r: %d c: %d i %d\n", state, r, c, i);
|
|
map(r,c) = i;
|
|
|
|
next_state = state;
|
|
if state == 'zig'
|
|
% move SE
|
|
c -= 1; r += 1;
|
|
if r > nr
|
|
r = nr; c+=2;
|
|
next_state = 'zag';
|
|
end
|
|
if c < 1
|
|
c = 1;
|
|
next_state = 'zag';
|
|
end
|
|
end
|
|
|
|
if state == 'zag'
|
|
% move SE
|
|
r -= 1; c +=1;
|
|
if c > nc
|
|
c = nc; r+=2;
|
|
next_state = 'zig';
|
|
end
|
|
if r < 1
|
|
r = 1;
|
|
next_state = 'zig';
|
|
end
|
|
end
|
|
state = next_state;
|
|
end
|
|
endfunction
|
|
|
|
|
|
% reshape matrix m as a vector v by reading out elements in zig-zag pattern
|
|
|
|
function v = mtov_zigzag(m)
|
|
[nr nc] = size(m);
|
|
map = zigzag(nr,nc)
|
|
v = zeros(1,nr*nc);
|
|
for r=1:nr
|
|
for c=1:nc
|
|
v(map(r,c)) = m(r,c);
|
|
end
|
|
end
|
|
endfunction
|
|
|
|
|
|
% extracts DCT information for rate K surface
|
|
|
|
function unwrapped_dcts = dct_blocks(surf, Nt=16)
|
|
[frames K] = size(surf);
|
|
|
|
% break into 160ms blocks, 2D DCT, truncate, IDCT
|
|
|
|
Nblocks = floor(frames/Nt);
|
|
unwrapped_dcts = zeros(Nblocks,Nt*K);
|
|
|
|
for n=1:Nblocks
|
|
st = (n-1)*Nt+1; en = st + Nt - 1;
|
|
D = dct2(surf(st:en,:));
|
|
unwrapped_dcts(n,:) = reshape(D',1,Nt*K);
|
|
end
|
|
endfunction
|
|
|
|
|
|
% Determines a map for quantising 2D DCT coeffs in order of rms value
|
|
% (ie std dev) of each coeff. Those coeffs with the greatest change
|
|
% need the most bits to quantise
|
|
|
|
function [map rms_map mx mx_ind unwrapped_dcts] = create_map_rms(rate_K_surface, nr, nc)
|
|
unwrapped_dcts = dct_blocks(rate_K_surface, nr);
|
|
[mx mx_ind] = sort(std(unwrapped_dcts));
|
|
mx_ind = fliplr(mx_ind); mx = fliplr(mx);
|
|
map = rms_map = zeros(nr,nc);
|
|
for i=1:nr*nc
|
|
r = floor((mx_ind(i)-1)/nc) + 1;
|
|
c = mx_ind(i) - (r-1)*nc;
|
|
%printf("%d %d %d\n", i, r, c);
|
|
map(r,c) = i;
|
|
rms_map(r,c) = mx(i);
|
|
end
|
|
endfunction
|
|
|
|
|
|
% plot histogram of each 2D DCT coeff, so we can get a feel for
|
|
% quantiser design
|
|
|
|
function plot_dct2_hists(rate_K_surface, nr, nc)
|
|
[map rms_map mx mx_ind unwrapped_dcts] = create_map_rms(rate_K_surface, nr, nc);
|
|
Ncoeff = nr*nc;
|
|
fign = 1; subplotn = 1;
|
|
close all; figure(fign); clf;
|
|
Nplot = 60;
|
|
for i=1:Nplot
|
|
subplot(5,4,subplotn);
|
|
d = unwrapped_dcts(:,mx_ind(i));
|
|
d = round(d/4);
|
|
hist(d);
|
|
subplotn++;
|
|
if (subplotn > 20) && (i != Nplot)
|
|
subplotn = 1;
|
|
fign++;
|
|
figure(fign); clf;
|
|
end
|
|
end
|
|
endfunction
|
|
|
|
|
|
% Gather run length data for each 2D DCT coeff, to see if run length encoding
|
|
% can help
|
|
|
|
function [run_length d]= plot_run_length(rate_K_surface, nr, nc)
|
|
[map rms_map mx mx_ind unwrapped_dcts] = create_map_rms(rate_K_surface, nr, nc);
|
|
Ncoeff = nr*nc;
|
|
[Nblocks tmp] = size(unwrapped_dcts);
|
|
|
|
% first get histogram of DCT values -----------------------------------
|
|
|
|
% some mild quantisation
|
|
|
|
unwrapped_dcts = round(unwrapped_dcts/4);
|
|
|
|
% note we only consider first half of DCT coeffs, unlikely to use all
|
|
|
|
d = [];
|
|
for i=1:Nblocks
|
|
d = [d unwrapped_dcts(i,mx_ind(1:Ncoeff/2))];
|
|
end
|
|
|
|
% note we remove outliers from plot as very low prob symbols
|
|
|
|
d = d(find(abs(d)<10));
|
|
figure(1); clf; [Wi, ii] = hist(d,-10:10,1); plot(ii,Wi);
|
|
length(d)
|
|
Wi = Wi(find(Wi > 0));
|
|
%sum(Wi)
|
|
%-log2(Wi)
|
|
%-Wi .* log2(Wi)
|
|
printf("bits/symbol: %2.2f\n", sum(-Wi .* log2(Wi)));
|
|
|
|
% now measure run lengths --------------------------------------------
|
|
|
|
run_length = zeros(21,Ncoeff);
|
|
state = 'idle';
|
|
|
|
for i=2:length(d)
|
|
|
|
next_state = state;
|
|
|
|
if state == 'idle'
|
|
if d(i-1) == d(i)
|
|
next_state = 'trac';
|
|
arun_length = 2;
|
|
else
|
|
run_length(d(i)+10, 1)++;
|
|
end
|
|
end
|
|
|
|
if state == 'trac'
|
|
if d(i-1) == d(i)
|
|
arun_length++;
|
|
else
|
|
next_state = 'idle';
|
|
run_length(d(i-1)+10, arun_length)++;
|
|
end
|
|
end
|
|
|
|
state = next_state;
|
|
|
|
end
|
|
|
|
figure(2); clf; mesh(run_length(:,1:10));
|
|
endfunction
|
|
|
|
|
|
% Design kmeans quantisers for each DCT coeff. This didn't work very well.
|
|
|
|
function [quantisers nbits] = design_quantisters_kmeans(rate_K_surface, nr, nc, nbits_max)
|
|
[map rms_map mx unwrapped_dcts] = create_map_rms(rate_K_surface, nr, nc);
|
|
nq = nr*nc;
|
|
quantisers = zeros(nq, 2^nbits_max); nbits = zeros(nq,1);
|
|
for i=1:nq
|
|
|
|
% work out number of levels for this quantiser such that it is a
|
|
% power of 2 for integer number of bits
|
|
|
|
nlevels = (2^nbits_max);
|
|
nbits(i) = round(log2(nlevels));
|
|
nlevels = 2 .^ nbits(i);
|
|
|
|
if i <= 100
|
|
printf("%d %d\n", i, nlevels);
|
|
[idx, centers] = kmeans(unwrapped_dcts(:,i), nlevels);
|
|
quantisers(i,1:nlevels) = sort(centers);
|
|
end
|
|
end
|
|
endfunction
|
|
|
|
|
|
% Uniform quantisers designed to fit limits of each DCT coeff
|
|
|
|
function [quantisers nlevels] = design_quantisters_uniform(rate_K_surface, nr, nc, nlevels_max)
|
|
[map rms_map mx mx_ind unwrapped_dcts] = create_map_rms(rate_K_surface, nr, nc);
|
|
|
|
nq = nr*nc;
|
|
quantisers = zeros(nq, nlevels_max); nlevels = zeros(nq, 1);
|
|
|
|
for i=1:nq
|
|
d = unwrapped_dcts(:,mx_ind(i));
|
|
d = floor(d/16);
|
|
q_min = floor(min(d));
|
|
q_max = ceil(max(d));
|
|
nlevels(i) = q_max-q_min+1;
|
|
quantisers(i,1:nlevels(i)) = 16*(q_min:q_max);
|
|
end
|
|
endfunction
|
|
|
|
|
|
% Determine a phase spectra from a magnitude spectra
|
|
% from http://www.dsprelated.com/showcode/20.php
|
|
% Haven't _quite_ figured out how this works but have to start somewhere ....
|
|
%
|
|
% TODO: we may be able to sample at a lower rate, like mWo
|
|
% but start with something that works
|
|
function [phase Gdbfk s Aw] = determine_phase(model, f, Nfft=512, ak)
|
|
Fs = 8000;
|
|
max_amp = 80;
|
|
L = min([model(f,2) max_amp-1]);
|
|
Wo = model(f,1);
|
|
|
|
sample_freqs_kHz = (Fs/1000)*[0:Nfft/2]/Nfft; % fft frequency grid (nonneg freqs)
|
|
Am = model(f,3:(L+2));
|
|
AmdB = 20*log10(Am);
|
|
rate_L_sample_freqs_kHz = (1:L)*Wo*4/pi;
|
|
Gdbfk = interp_lanczos(rate_L_sample_freqs_kHz, AmdB, Fs/(2*1000), sample_freqs_kHz);
|
|
|
|
% Gdbfk = resample_mask(model, f, mask_sample_freqs_kHz);
|
|
|
|
% optional input of aks for testing
|
|
|
|
if nargin == 4
|
|
Aw = 1 ./ fft(ak,Nfft);
|
|
Gdbfk = 20*log10(abs(Aw(1:Nfft/2+1)));
|
|
end
|
|
|
|
[phase s] = mag_to_phase(Gdbfk, Nfft);
|
|
|
|
endfunction
|
|
|
|
|
|
% Non linear sampling of frequency axis, reducing the "rate" is a
|
|
% first step before VQ
|
|
|
|
function mel = ftomel(fHz)
|
|
mel = floor(2595*log10(1+fHz/700)+0.5);
|
|
endfunction
|
|
|
|
|
|
function rate_K_sample_freqs_kHz = mel_sample_freqs_kHz(K, fstart_hz=100, fend_hz=0.95*4000)
|
|
mel_start = ftomel(fstart_hz); mel_end = ftomel(fend_hz);
|
|
step = (mel_end-mel_start)/(K-1);
|
|
mel = mel_start:step:mel_end;
|
|
rate_K_sample_freqs_Hz = 700*((10 .^ (mel/2595)) - 1);
|
|
rate_K_sample_freqs_kHz = rate_K_sample_freqs_Hz/1000;
|
|
endfunction
|
|
|
|
|
|
function plot_mel_sample_freqs(K, f_start_hz, f_end_hz)
|
|
rate_K_sample_freqs_kHz = mel_sample_freqs_kHz(K, f_start_hz, f_end_hz);
|
|
figure(1); clf;
|
|
plot(rate_K_sample_freqs_kHz,'+');
|
|
endfunction
|
|
|
|
function [rate_K_surface rate_K_sample_freqs_kHz] = resample_const_rate_f_mel(model, K, Fs=8000, interp_alg='lanc')
|
|
rate_K_sample_freqs_kHz = mel_sample_freqs_kHz(K, 100, 0.95*Fs/2);
|
|
rate_K_surface = resample_const_rate_f(model, rate_K_sample_freqs_kHz, Fs, interp_alg);
|
|
endfunction
|
|
|
|
|
|
% Resample Am from time-varying rate L=floor(pi/Wo) to fixed rate K. This can be viewed
|
|
% as a 3D surface with time, freq, and ampitude axis.
|
|
|
|
function [rate_K_surface rate_K_sample_freqs_kHz] = resample_const_rate_f(model, rate_K_sample_freqs_kHz, Fs, interp_alg='lanc')
|
|
|
|
% convert rate L=pi/Wo amplitude samples to fixed rate K
|
|
|
|
max_amp = 160;
|
|
[frames col] = size(model);
|
|
K = length(rate_K_sample_freqs_kHz);
|
|
rate_K_surface = zeros(frames, K);
|
|
|
|
for f=1:frames
|
|
Wo = model(f,1);
|
|
L = min([model(f,2) max_amp-1]);
|
|
Am = model(f,3:(L+2));
|
|
AmdB = 20*log10(Am);
|
|
|
|
clip_en = 0;
|
|
if clip_en
|
|
% clip between peak and peak -50dB, to reduce dynamic range
|
|
|
|
AmdB_peak = max(AmdB);
|
|
AmdB(find(AmdB < (AmdB_peak-50))) = AmdB_peak-50;
|
|
end
|
|
|
|
rate_L_sample_freqs_kHz = (1:L)*Wo*Fs/(2000*pi);
|
|
|
|
%rate_K_surface(f,:) = interp1([0 rate_L_sample_freqs_kHz (Fs/2000)], [AmdB(1) AmdB AmdB(L)], rate_K_sample_freqs_kHz, "spline");
|
|
if strcmp(interp_alg, 'para')
|
|
rate_K_surface(f,:) = interp_para(rate_L_sample_freqs_kHz, AmdB, Fs/(2*1000), rate_K_sample_freqs_kHz);
|
|
end
|
|
if strcmp(interp_alg, 'lanc')
|
|
rate_K_surface(f,:) = interp_lanczos(rate_L_sample_freqs_kHz, AmdB, Fs/(2*1000), rate_K_sample_freqs_kHz);
|
|
end
|
|
|
|
%printf("%d\n", f);
|
|
end
|
|
%printf("\n");
|
|
endfunction
|
|
|
|
|
|
function [rate_K_vec_corrected orig_error error nasty_error_log nasty_error_m_log] = correct_rate_K_vec(rate_K_vec, rate_K_sample_freqs_kHz, AmdB, AmdB_, K, Wo, L, Fs)
|
|
|
|
% aliasing correction --------------------------------------
|
|
|
|
% The mel sample rate decreases as frequency increases. Look for
|
|
% any regions above 1000Hz where we have missed definition of a
|
|
% spectral peak (formant) due to aliasing. Adjust the rate K
|
|
% sample levels to restore peaks. Theory is that correct
|
|
% definition of a formant is less important than the frequency of
|
|
% the formant. As long as we define a formant in that general
|
|
% frequency area it will sound OK.
|
|
|
|
Am_freqs_kHz = (1:L)*Wo*Fs/(2000*pi);
|
|
|
|
% Lets see where we have made an error
|
|
|
|
error = orig_error = AmdB(1:L) - AmdB_(1:L);
|
|
|
|
Ncorrections = 3; % maximum number of rate K samples to correct
|
|
error_thresh = 3; % only worry about errors larger than thresh
|
|
|
|
start_m = floor(L*1000/(Fs/2));
|
|
error(1:start_m) = 0; % first 1000Hz is densly sampled so ignore
|
|
nasty_error_m_log = []; nasty_error_log = [];
|
|
|
|
|
|
rate_K_vec_corrected = rate_K_vec;
|
|
for i=1:Ncorrections
|
|
[mx mx_m] = max(error);
|
|
|
|
if mx > error_thresh
|
|
nasty_error_log = [nasty_error_log mx];
|
|
nasty_error_m_log = [nasty_error_m_log mx_m];
|
|
|
|
% find closest rate K sample to nasty error
|
|
|
|
nasty_error_freq = mx_m*Wo*Fs/(2*pi*1000);
|
|
[tmp closest_k] = min(abs(rate_K_sample_freqs_kHz - nasty_error_freq));
|
|
rate_K_vec_corrected(closest_k) = AmdB(mx_m);
|
|
|
|
% zero out error in this region and look for another large error region
|
|
|
|
k = max(1, closest_k-1);
|
|
rate_K_prev_sample_kHz = rate_K_sample_freqs_kHz(k);
|
|
k = min(K, closest_k+1);
|
|
rate_K_next_sample_kHz = rate_K_sample_freqs_kHz(k);
|
|
|
|
[tmp st_m] = min(abs(Am_freqs_kHz - rate_K_prev_sample_kHz));
|
|
[tmp en_m] = min(abs(Am_freqs_kHz - rate_K_next_sample_kHz));
|
|
if closest_k == K
|
|
en_m = L;
|
|
end
|
|
error(st_m:en_m) = 0;
|
|
end
|
|
end
|
|
endfunction
|
|
|
|
|
|
% Take a rate K surface and convert back to time varying rate L
|
|
|
|
function [model_ AmdB_] = resample_rate_L(model, rate_K_surface, rate_K_sample_freqs_kHz, Fs=8000, interp_alg='lanc')
|
|
max_amp = 160; K = columns(rate_K_surface);
|
|
[frames col] = size(model);
|
|
|
|
AmdB_ = zeros(frames, max_amp);
|
|
model_ = zeros(frames, max_amp+2);
|
|
for f=1:frames
|
|
Wo = model(f,1);
|
|
L = model(f,2);
|
|
rate_L_sample_freqs_kHz = (1:L)*Wo*Fs/(2000*pi);
|
|
|
|
% back down to rate L
|
|
|
|
% dealing with end effects is an ongoing issue.....need a better solution
|
|
|
|
if strcmp(interp_alg, 'para')
|
|
AmdB_(f,1:L) = interp_para(rate_K_sample_freqs_kHz, rate_K_surface(f,:), Fs/(2*1000), rate_L_sample_freqs_kHz);
|
|
end
|
|
if strcmp(interp_alg, 'lanc')
|
|
AmdB_(f,1:L) = interp_lanczos(rate_K_sample_freqs_kHz, rate_K_surface(f,:), Fs/(2*1000), rate_L_sample_freqs_kHz);
|
|
end
|
|
if strcmp(interp_alg, 'lancmel')
|
|
rate_K_sample_freqs_mel = ftomel(rate_K_sample_freqs_kHz*1000);
|
|
rate_L_sample_freqs_mel = ftomel(rate_L_sample_freqs_kHz*1000);
|
|
AmdB_(f,1:L) = interp_lanczos(rate_K_sample_freqs_mel, rate_K_surface(f,:), Fs/(2*1000), rate_L_sample_freqs_mel);
|
|
end
|
|
|
|
#{
|
|
if pad_end
|
|
AmdB_(f,1:L) = interp1([0 rate_K_sample_freqs_kHz Fs/2000],
|
|
[rate_K_surface(f,1) rate_K_surface(f,:) rate_K_surface(f,K)],
|
|
rate_L_sample_freqs_kHz,
|
|
"spline");
|
|
else
|
|
AmdB_(f,1:L) = interp1([0 rate_K_sample_freqs_kHz],
|
|
[rate_K_surface(f,1) rate_K_surface(f,:)],
|
|
rate_L_sample_freqs_kHz,
|
|
"spline");
|
|
end
|
|
#}
|
|
|
|
%AmdB_(f,1:L) = interp_para(rate_K_sample_freqs_kHz, rate_K_surface(f,:), Fs/(2*1000), rate_L_sample_freqs_kHz);
|
|
%printf("f: %d %f %f %f\n", f, rate_K_sample_freqs_kHz(1), rate_L_sample_freqs_kHz(1), AmdB_(1));
|
|
model_(f,1) = Wo; model_(f,2) = L; model_(f,3:(L+2)) = 10 .^ (AmdB_(f, 1:L)/20);
|
|
end
|
|
endfunction
|
|
|
|
|
|
% PostFilter, has a big impact on speech quality after VQ. When used
|
|
% on a mean removed rate K vector, it raises formants, and supresses
|
|
% anti-formants. As it manipulates amplitudes, we normalise energy to
|
|
% prevent clipping or large level variations. pf_gain of 1.2 to 1.5
|
|
% (dB) seems to work OK. Good area for further investigations and
|
|
% improvements in speech quality.
|
|
|
|
function vec = post_filter(vec, sample_freq_kHz, pf_gain = 1.5, voicing)
|
|
% vec is rate K vector describing spectrum of current frame
|
|
% lets pre-emp before applying PF. 20dB/dec over 300Hz
|
|
|
|
pre = 20*log10(sample_freq_kHz/0.3);
|
|
vec += pre;
|
|
|
|
levels_before_linear = 10 .^ (vec/20);
|
|
e_before = sum(levels_before_linear .^2);
|
|
|
|
vec *= pf_gain;
|
|
|
|
levels_after_linear = 10 .^ (vec/20);
|
|
e_after = sum(levels_after_linear .^2);
|
|
gain = e_after/e_before;
|
|
gaindB = 10*log10(gain);
|
|
vec -= gaindB;
|
|
|
|
vec -= pre;
|
|
endfunction
|
|
|
|
|
|
% construct energy quantiser table, and save to text file to include in C
|
|
|
|
function energy_q = create_energy_q
|
|
energy_q = 10 + 40/16*(0:15);
|
|
endfunction
|
|
|
|
function save_energy_q(fn)
|
|
energy_q = create_energy_q;
|
|
f = fopen(fn, "wt");
|
|
fprintf(f, "1 %d\n", length(energy_q));
|
|
for n=1:length(energy_q)
|
|
fprintf(f, "%f\n", energy_q(n));
|
|
end
|
|
fclose(f);
|
|
endfunction
|
|
|
|
|
|
% save's VQ in format that can be compiled by Codec 2 build system
|
|
|
|
function save_vq(vqset, filenameprefix)
|
|
[Nvec order stages] = size(vqset);
|
|
for s=1:stages
|
|
fn = sprintf("%s_%d.txt", filenameprefix, s);
|
|
f = fopen(fn, "wt");
|
|
fprintf(f, "%d %d\n", order, Nvec);
|
|
for n=1:Nvec
|
|
for k=1:order
|
|
fprintf(f, "% 8.4f ", vqset(n,k,s));
|
|
end
|
|
fprintf(f, "\n");
|
|
end
|
|
fclose(f);
|
|
end
|
|
endfunction
|
|
|
|
|
|
% Decoder side interpolation of Wo and voicing, to go from 25 Hz
|
|
% sample rate used over channel to 100Hz internal sample rate of Codec
|
|
% 2.
|
|
|
|
function [Wo_ voicing_] = interp_Wo_v(Wo1, Wo2, voicing1, voicing2)
|
|
M = 4;
|
|
max_amp = 80;
|
|
|
|
Wo_ = zeros(1,M);
|
|
voicing_ = zeros(1,M);
|
|
if !voicing1 && !voicing2
|
|
Wo_(1:M) = 2*pi/100;
|
|
end
|
|
|
|
if voicing1 && !voicing2
|
|
Wo_(1:M/2) = Wo1;
|
|
Wo_(M/2+1:M) = 2*pi/100;
|
|
voicing_(1:M/2) = 1;
|
|
end
|
|
|
|
if !voicing1 && voicing2
|
|
Wo_(1:M/2) = 2*pi/100;
|
|
Wo_(M/2+1:M) = Wo2;
|
|
voicing_(M/2+1:M) = 1;
|
|
end
|
|
|
|
if voicing1 && voicing2
|
|
Wo_samples = [Wo1 Wo2];
|
|
Wo_(1:M) = interp_linear([1 M+1], Wo_samples, 1:M);
|
|
voicing_(1:M) = 1;
|
|
end
|
|
|
|
#{
|
|
printf("f: %d f+M/2: %d Wo: %f %f (%f %%) v: %d %d \n", f, f+M/2, model(f,1), model(f+M/2,1), 100*abs(model(f,1) - model(f+M/2,1))/model(f,1), voicing(f), voicing(f+M/2));
|
|
for i=f:f+M/2-1
|
|
printf(" f: %d v: %d v_: %d Wo: %f Wo_: %f\n", i, voicing(i), voicing_(i), model(i,1), model_(i,1));
|
|
end
|
|
#}
|
|
endfunction
|
|
|
|
|
|
function [diff_weighted weights error g min_ind] = search_vq_weighted(target, vq, weight_gain)
|
|
[vq_rows vq_cols] = size(vq);
|
|
|
|
weight_gain = 0.1; % I like this vairable name as it is funny
|
|
|
|
% find mse for each vector
|
|
|
|
error = g = zeros(1, vq_rows);
|
|
diff = weights = diff_weighted = zeros(vq_rows, vq_cols);
|
|
|
|
weights = max(0.1, weight_gain .* (target + 20));
|
|
|
|
for i=1:vq_rows
|
|
|
|
% work out gain for best match
|
|
|
|
g(i) = sum((target - vq(i,:)).*weights)/vq_cols;
|
|
|
|
% Find weighted difference. This allocated more importance
|
|
% (error) to samples with higher energy, and stops really low
|
|
% level harmonics from having any impact. Note addition in dB
|
|
% is multiplication in linear
|
|
|
|
diff(i,:) = target - vq(i,:) - g(i);
|
|
|
|
diff_weighted(i,:) = diff(i,:) .* weights;
|
|
|
|
% abs in dB is MSE in linear
|
|
|
|
error(i) = mean(abs(diff_weighted(i,:)));
|
|
end
|
|
|
|
[mn min_ind] = min(error);
|
|
|
|
endfunction
|
|
|
|
|
|
% --------------------------------------------------------------------------------
|
|
% Experimental functions used for masking, piecewise models, not part of newamp1
|
|
% --------------------------------------------------------------------------------
|
|
|
|
|
|
function [maskdB_ maskdB_cyclic Dabs dk_ D1_ ind] = decimate_in_freq(maskdB, cyclic=1, k=7, vq)
|
|
|
|
% Lets try to come up with a smoothed, cyclic model. Replace
|
|
% points from 3500 Hz to 4000Hz with a sequence that joins without
|
|
% a step to points at the 0Hz end of the spectrum. This will make
|
|
% it more cyclical and make the DFT happier, less high freq
|
|
% energy. Yes, happier is an extremely technical term.
|
|
|
|
L = length(maskdB);
|
|
anchor = floor(7*L/8);
|
|
xpts = [ anchor-1 anchor L+1 L+2];
|
|
ypts = [ maskdB(anchor-1) maskdB(anchor) maskdB(1) maskdB(2)];
|
|
mask_pp = splinefit(xpts, ypts, 1);
|
|
maskdB_cyclic = [maskdB(1:anchor) ppval(mask_pp, anchor+1:L)];
|
|
|
|
% Now DFT, truncating DFT coeffs to undersample
|
|
|
|
if cyclic
|
|
D = fft(maskdB_cyclic)/L;
|
|
else
|
|
D = fft(maskdB)/L;
|
|
end
|
|
Dabs = abs(D); % this returned for plotting
|
|
|
|
% truncate D to rate k, convert to 2k length real vector for quantisation and transmission
|
|
|
|
Dk = [0 D(2:k-1) real(D(k)) D(L-k+1:L)];
|
|
dk = real(ifft(Dk));
|
|
D1 = D(1);
|
|
|
|
% quantisation
|
|
|
|
if nargin == 4
|
|
[res tmp vq_ind] = mbest(vq, dk, 4);
|
|
D1_tab = 0:(60/15):60;
|
|
assert(length(D1_tab) == 16);
|
|
[tmp D1_ind] = quantise(D1_tab, D1);
|
|
ind = [vq_ind D1_ind];
|
|
[dk_ D1_] = index_to_params(ind, vq);
|
|
%std(dk_ - dk)
|
|
else
|
|
dk_ = dk;
|
|
D1_ = D1;
|
|
end
|
|
|
|
maskdB_ = params_to_mask(L, k, dk_, D1_);
|
|
endfunction
|
|
|
|
|
|
|
|
function [dk_ D1_] = index_to_params(ind, vq)
|
|
[Nvec order stages] = size(vq);
|
|
dk_ = zeros(1,order);
|
|
for s=1:stages
|
|
dk_ = dk_ + vq(ind(s),:,s);
|
|
end
|
|
D1_tab = 0:(60/15):60;
|
|
D1_ = D1_tab(ind(stages+1));
|
|
endfunction
|
|
|
|
|
|
% decoder side
|
|
|
|
function maskdB_ = params_to_mask(L, k, dk_, D1_)
|
|
|
|
anchor = floor(7*L/8);
|
|
|
|
% convert quantised dk back to rate L magnitude spectrum
|
|
|
|
Dk_ = fft(dk_);
|
|
D_ = zeros(1,L);
|
|
D_(1) = D1_; % energy seperately quantised
|
|
D_(2:k-1) = Dk_(2:k-1);
|
|
D_(L-k+1:L) = Dk_(k+1:2*k);
|
|
d_ = L*ifft(D_); % back to spectrum at rate L
|
|
maskdB_ = real(d_);
|
|
|
|
% Finally fix up last 500Hz, taper down 10dB at 4000Hz
|
|
|
|
xpts = [ anchor-1 anchor L];
|
|
ypts = [ maskdB_(anchor-1) maskdB_(anchor) (maskdB_(anchor)-10)];
|
|
mask_pp = splinefit(xpts, ypts, 1);
|
|
maskdB_ = [maskdB_(1:anchor) ppval(mask_pp, anchor+1:L)];
|
|
endfunction
|
|
|
|
|
|
|
|
% determine cumulative mask, using amplitude of each harmonic. Mask is
|
|
% sampled across L points in the linear domain
|
|
|
|
function maskdB = determine_mask(masker_amps_dB, masker_freqs_kHz, mask_sample_freqs_kHz, bark_model=1)
|
|
|
|
% calculate and plot masking curve
|
|
|
|
maskdB = -20*ones(1,length(mask_sample_freqs_kHz));
|
|
for m=1:length(masker_freqs_kHz)
|
|
maskdB = max(maskdB, schroeder(masker_freqs_kHz(m), mask_sample_freqs_kHz, bark_model) + masker_amps_dB(m));
|
|
%maskdB = max(maskdB, parabolic_resonator(masker_freqs_kHz(m), mask_sample_freqs_kHz) + masker_amps_dB(m));
|
|
end
|
|
end
|
|
|
|
|
|
% Sample mask as model for Am
|
|
|
|
function [maskdB Am_freqs_kHz] = mask_model(AmdB, Wo, L, bark_model=1)
|
|
|
|
Am_freqs_kHz = (1:L)*Wo*4/pi;
|
|
maskdB = determine_mask(AmdB, Am_freqs_kHz, Am_freqs_kHz, bark_model);
|
|
endfunction
|
|
|
|
|
|
%
|
|
% Masking functions from http://www.perceptualentropy.com/coder.html#C
|
|
% Thanks Jon Boley!
|
|
%
|
|
|
|
% Calculate the Schroeder masking spectrum for a given frequency and SPL
|
|
|
|
function maskdB = schroeder(freq_tone_kHz, mask_sample_freqs_kHz, bark_model=1)
|
|
f_kHz = mask_sample_freqs_kHz;
|
|
f_Hz = f_kHz*1000;
|
|
|
|
% Schroeder Spreading Function
|
|
|
|
if bark_model == 0
|
|
dz = bark(freq_tone_kHz*1000)-bark(f_Hz);
|
|
end
|
|
|
|
if bark_model == 1
|
|
|
|
% Modification by DR: Piecewise linear model that makes bands
|
|
% beneath 1.5kHz wider to match the width of F1 and
|
|
% "fill in" the spectra better for UV sounds.
|
|
|
|
%x1 = 0.5; x2 = 2;
|
|
%y1 = 0.5; y2 = 1;
|
|
x1 = 0.5; x2 = 3;
|
|
y1 = 1; y2 = 3;
|
|
|
|
grad = (y2 - y1)/(x2 - x1);
|
|
y_int = y1 - grad*x1;
|
|
|
|
if freq_tone_kHz <= x1
|
|
y = y1;
|
|
end
|
|
if (freq_tone_kHz > x1) && (freq_tone_kHz < x2)
|
|
y = grad*freq_tone_kHz + y_int;
|
|
end
|
|
if freq_tone_kHz >= x2
|
|
y = y2;
|
|
end
|
|
dz = y*(bark(freq_tone_kHz*1000) - bark(f_Hz));
|
|
end
|
|
|
|
if bark_model == 2
|
|
|
|
% constant bandwidth model, useful for bg noise and UV
|
|
|
|
%dz = bark(freq_tone_kHz*1000) - bark(f_Hz);
|
|
dz = 0.2*bark(freq_tone_kHz*1000-f_Hz);
|
|
end
|
|
|
|
maskdB = 15.81 + 7.5*(dz+0.474) - 17.5*sqrt(1 + (dz+0.474).^2);
|
|
endfunction
|
|
|
|
|
|
% Converts frequency to bark scale
|
|
% Frequency should be specified in Hertz
|
|
|
|
function b=bark(f)
|
|
b = 13*atan(0.76*f/1000) + 3.5*atan((f/7500).^2);
|
|
endfunction
|
|
|
|
|
|
% Alternative mask function that has a gentler slope than schroeder.
|
|
% Idea is to get sharp formant definition, but also fill in gaps so we
|
|
% dont get chunks of spectrum coming and going
|
|
|
|
function [maskdB pp] = resonator(freq_tone_kHz, mask_sample_freqs_kHz)
|
|
% note all frequencies in kHz
|
|
|
|
f1 = 0.1; f2 = 3;
|
|
bw1 = 0.1; bw2 = 0.1;
|
|
m = (bw2-bw1)/(log10(f2)-log10(f1));
|
|
c = bw1 - m*log10(f1);
|
|
|
|
Fs = 8;
|
|
slope = -12; % filter falls off by this slope/octave
|
|
|
|
maskdB = zeros(1, length(mask_sample_freqs_kHz));
|
|
|
|
% frequency dependant bandwidth
|
|
|
|
bw = m*log10(freq_tone_kHz) + c;
|
|
printf("freq_tone_kHz: %f bw: %f\n", freq_tone_kHz, bw);
|
|
|
|
% Design spline to set shape based on current bandwidth
|
|
|
|
x = [-Fs/2 -bw/2 0 +bw/2 +Fs/2];
|
|
delta = slope*log2(Fs/bw); % gain is delta down from -3dB to Fs/2
|
|
y = [-3 + delta, -3, 0, -3, -3 + delta];
|
|
pp = splinefit(x, y, 4);
|
|
maskdB = ppval(pp, mask_sample_freqs_kHz - freq_tone_kHz);
|
|
endfunction
|
|
|
|
|
|
function maskdB = resonator_fast(freq_tone_kHz, mask_sample_freqs_kHz)
|
|
|
|
% note all frequencies on kHz
|
|
|
|
#{
|
|
max_ind = length(pp_bw);
|
|
ind = round(freq_tone_kHz/0.1);
|
|
ind = min(ind, max_ind);
|
|
ind = max(ind, 1);
|
|
#}
|
|
%printf("freq_tone_kHz: %f ind: %d\n", freq_tone_kHz, ind);
|
|
[maskdB_res1 pp] = resonator(0.5, mask_sample_freqs_kHz);
|
|
|
|
maskdB = ppval(pp, mask_sample_freqs_kHz - freq_tone_kHz);
|
|
%maskdB = ppval(pp_bw(ind), mask_sample_freqs_kHz - freq_tone_kHz);
|
|
endfunction
|
|
|
|
|
|
% Alternative mask function that uses parabolas for fast computation
|
|
|
|
function maskdB = parabolic_resonator(freq_tone_kHz, mask_sample_freqs_kHz)
|
|
|
|
% note all frequencies in kHz
|
|
|
|
% bandwidth as a function of log(f)
|
|
|
|
f1 = 0.5; f2 = 3;
|
|
bw1 = 0.1; bw2 = 0.3;
|
|
m = (bw2-bw1)/(log10(f2)-log10(f1));
|
|
c = bw1 - m*log10(f1);
|
|
|
|
Fs = 8;
|
|
slope = -18;
|
|
|
|
% frequency dependant bandwidth
|
|
|
|
if freq_tone_kHz < f1
|
|
bw = bw1;
|
|
else
|
|
bw = m*log10(freq_tone_kHz) + c;
|
|
end
|
|
%printf("freq_tone_kHz: %f bw: %f\n", freq_tone_kHz, bw);
|
|
|
|
% Design parabola to fit bandwidth
|
|
|
|
a = -3/((bw/2)^2);
|
|
%printf("freq_tone_kHz: %f bw: %f a: %f\n", freq_tone_kHz, bw, a);
|
|
|
|
% Design straight line to fit slope
|
|
|
|
delta = slope*log2(Fs/bw); % gain is delta down from -3dB to Fs/2
|
|
m1 = 2*delta/Fs;
|
|
|
|
maskdB_par = a*((mask_sample_freqs_kHz - freq_tone_kHz).^2);
|
|
maskdB_line = m1*abs(mask_sample_freqs_kHz - freq_tone_kHz) - 10;
|
|
%indx = find(mask_sample_freqs_kHz < freq_tone_kHz);
|
|
%maskdB_line(indx) = -50;
|
|
|
|
maskdB = max(maskdB_par, maskdB_line);
|
|
endfunction
|
|
|
|
|
|
% sampling the mask in one frame using an arbitrary set of samplng frequencies
|
|
|
|
function maskdB = resample_mask(model, f, mask_sample_freqs_kHz)
|
|
max_amp = 80;
|
|
|
|
Wo = model(f,1);
|
|
L = min([model(f,2) max_amp-1]);
|
|
Am = model(f,3:(L+2));
|
|
AmdB = 20*log10(Am);
|
|
masker_freqs_kHz = (1:L)*Wo*4/pi;
|
|
maskdB = determine_mask(AmdB, masker_freqs_kHz, mask_sample_freqs_kHz);
|
|
endfunction
|
|
|
|
|
|
% decimate frame rate of mask, use linear interpolation in the log domain
|
|
|
|
function maskdB_ = decimate_frame_rate(model, decimate, f, frames)
|
|
max_amp = 80;
|
|
|
|
Wo = model(f,1);
|
|
L = min([model(f,2) max_amp]);
|
|
|
|
% determine frames that bracket the one we need to interp
|
|
|
|
left_f = decimate*floor((f-1)/decimate)+1;
|
|
right_f = left_f + decimate;
|
|
if right_f > frames
|
|
right_f = left_f;
|
|
end
|
|
|
|
% determine fraction of each frame to use
|
|
|
|
left_fraction = 1 - mod((f-1),decimate)/decimate;
|
|
right_fraction = 1 - left_fraction;
|
|
|
|
printf("f: %d left_f: %d right_f: %d left_fraction: %3.2f right_fraction: %3.2f \n", f, left_f, right_f, left_fraction, right_fraction)
|
|
|
|
% fit splines to left and right masks
|
|
|
|
left_Wo = model(left_f,1);
|
|
left_L = min([model(left_f,2) max_amp]);
|
|
left_AmdB = 20*log10(model(left_f,3:(left_L+2)));
|
|
left_sample_freqs_kHz = (1:left_L)*left_Wo*4/pi;
|
|
|
|
right_Wo = model(right_f,1);
|
|
right_L = min([model(right_f,2) max_amp]);
|
|
right_AmdB = 20*log10(model(right_f,3:(right_L+2)));
|
|
right_sample_freqs_kHz = (1:right_L)*right_Wo*4/pi;
|
|
|
|
% determine mask for left and right frames, sampling at Wo for this frame
|
|
|
|
sample_freqs_kHz = (1:L)*Wo*4/pi;
|
|
maskdB_left = interp1(left_sample_freqs_kHz, left_AmdB, sample_freqs_kHz, "extrap");
|
|
maskdB_right = interp1(right_sample_freqs_kHz, right_AmdB, sample_freqs_kHz, "extrap");
|
|
|
|
maskdB_ = left_fraction*maskdB_left + right_fraction*maskdB_right;
|
|
endfunction
|
|
|
|
|
|
% plot some masking curves, used for working on masking filter changes
|
|
|
|
function plot_masking(bark_model=0);
|
|
Fs = 8000;
|
|
|
|
figure(1)
|
|
mask_sample_freqs_kHz = 0.1:0.025:(Fs/1000)/2;
|
|
%maskdB_s0 = schroeder(0.5, mask_sample_freqs_kHz, 0);
|
|
%plot(mask_sample_freqs_kHz, maskdB_s0,';schroeder 0;');
|
|
maskdB_s1 = schroeder(0.5, mask_sample_freqs_kHz, bark_model);
|
|
plot(mask_sample_freqs_kHz, maskdB_s1,'g;schroeder 1;');
|
|
#{
|
|
maskdB_res = parabolic_resonator(0.5, mask_sample_freqs_kHz);
|
|
plot(mask_sample_freqs_kHz, maskdB_res,'r;resonator;');
|
|
#}
|
|
hold on;
|
|
|
|
for f=0.5:0.5:3
|
|
%maskdB_s0 = schroeder(f, mask_sample_freqs_kHz, 0);
|
|
%plot(mask_sample_freqs_kHz, maskdB_s0);
|
|
maskdB_s1 = schroeder(f, mask_sample_freqs_kHz, bark_model);
|
|
plot(mask_sample_freqs_kHz, maskdB_s1,'g');
|
|
#{
|
|
maskdB_res = parabolic_resonator(f, mask_sample_freqs_kHz);
|
|
plot(mask_sample_freqs_kHz, maskdB_res,'r;resonator;');
|
|
#}
|
|
end
|
|
hold off;
|
|
|
|
axis([0.1 4 -30 0])
|
|
grid
|
|
|
|
#{
|
|
%pp_bw = gen_pp_bw;
|
|
figure(2)
|
|
clf;
|
|
maskdB_res = resonator(0.5, mask_sample_freqs_kHz);
|
|
plot(mask_sample_freqs_kHz, maskdB_res);
|
|
hold on;
|
|
maskdB_res_fast = resonator_fast(0.5, mask_sample_freqs_kHz);
|
|
plot(mask_sample_freqs_kHz, maskdB_res_fast, "g");
|
|
maskdB_par = parabolic_resonator(0.5, mask_sample_freqs_kHz);
|
|
plot(mask_sample_freqs_kHz, maskdB_par, "r");
|
|
hold off;
|
|
axis([0 4 -80 0]);
|
|
grid
|
|
#}
|
|
endfunction
|
|
|
|
|
|
% produce a scatter diagram of amplitudes
|
|
|
|
function amp_scatter(samname)
|
|
|
|
model_name = strcat(samname,"_model.txt");
|
|
model = load(model_name);
|
|
[frames nc] = size(model);
|
|
max_amp = 80;
|
|
|
|
Am_out_name = sprintf("%s_am.out", samname);
|
|
freqs = [];
|
|
ampsdB = [];
|
|
|
|
for f=1:frames
|
|
|
|
L = min([model(f,2) max_amp-1]);
|
|
Wo = model(f,1);
|
|
Am = model(f,3:(L+2));
|
|
AmdB = 20*log10(Am);
|
|
|
|
maskdB = mask_model(AmdB, Wo, L);
|
|
mask_sample_freqs_kHz = (1:L)*Wo*4/pi;
|
|
[newmaskdB local_maxima] = make_newmask(maskdB, AmdB, Wo, L, mask_sample_freqs_kHz);
|
|
|
|
[nlm tmp] = size(local_maxima);
|
|
freqs = [freqs (local_maxima(1:min(4,nlm),2)*Wo*4000/pi)'];
|
|
an_ampsdB = local_maxima(1:min(4,nlm),1)';
|
|
ampsdB = [ampsdB an_ampsdB-mean(an_ampsdB)];
|
|
end
|
|
|
|
figure(1)
|
|
plot(freqs, ampsdB,'+');
|
|
figure(2)
|
|
subplot(211)
|
|
hist(freqs,20)
|
|
subplot(212)
|
|
hist(ampsdB,20)
|
|
endfunction
|
|
|
|
|
|
|
|
% AbyS returns f & a, this function plots values so we can consider quantisation
|
|
|
|
function plot_f_a_stats(f,a)
|
|
|
|
% freq pdfs
|
|
|
|
[fsrt fsrt_ind] = sort(f,2);
|
|
fsrt /= 1000;
|
|
figure(1)
|
|
for i=1:4
|
|
subplot(2,2,i)
|
|
hist(fsrt(:,i),50)
|
|
printf("%d min: %d max: %d\n", i, min(fsrt(:,i)), max(fsrt(:,i)))
|
|
an_axis = axis;
|
|
axis([0 4 an_axis(3) an_axis(4)])
|
|
end
|
|
|
|
% freq diff pdfs
|
|
|
|
figure(2)
|
|
for i=1:4
|
|
subplot(2,2,i)
|
|
if i == 1
|
|
hist(fsrt(:,i),50)
|
|
else
|
|
hist(fsrt(:,i) - fsrt(:,i-1),50)
|
|
end
|
|
an_axis = axis;
|
|
axis([0 4 an_axis(3) an_axis(4)])
|
|
end
|
|
|
|
% amplitude PDFs
|
|
|
|
l = length(a);
|
|
for i=1:l
|
|
asrt(i,:) = a(i, fsrt_ind(i,:));
|
|
end
|
|
|
|
figure(3)
|
|
for i=1:4
|
|
subplot(2,2,i)
|
|
hist(asrt(:,i) - mean(asrt(:,:),2))
|
|
an_axis = axis;
|
|
axis([-40 40 an_axis(3) an_axis(4)])
|
|
end
|
|
|
|
% find straight line fit
|
|
|
|
for i=1:l
|
|
[gradient intercept] = linreg(1000*fsrt(i,:), asrt(i,:), 4);
|
|
alinreg(i,:) = gradient*1000*fsrt(i,:) + intercept;
|
|
alinregres(i,:) = asrt(i,:) - alinreg(i,:);
|
|
m(i) = gradient; c(i) = intercept;
|
|
end
|
|
|
|
figure(4)
|
|
for i=1:4
|
|
subplot(2,2,i)
|
|
hist(alinregres(:,i))
|
|
an_axis = axis;
|
|
axis([-40 40 an_axis(3) an_axis(4)])
|
|
end
|
|
|
|
figure(5)
|
|
subplot(211)
|
|
m = m(find(m>-0.05));
|
|
m = m(find(m<0.03));
|
|
hist(m,50)
|
|
title('gradient');
|
|
subplot(212)
|
|
c = c(find(c>0));
|
|
hist(c,50)
|
|
title('y-int');
|
|
|
|
endfunction
|
|
|
|
function D1_log = decode_from_bit_stream(samname, ber = 0, bit_error_mask = ones(28,1))
|
|
max_amp = 80;
|
|
bits_per_param = [6 1 8 8 4 1];
|
|
assert(sum(bits_per_param) == 28);
|
|
load vq;
|
|
k = 10;
|
|
dec_in_time = 1;
|
|
train = 0;
|
|
decimate = 4;
|
|
synth_phase = 1;
|
|
|
|
Am_out_name = sprintf("%s_am.out", samname);
|
|
Aw_out_name = sprintf("%s_aw.out", samname);
|
|
bit_stream_name = strcat(samname,".bit");
|
|
faw = fopen(Aw_out_name,"wb");
|
|
fam = fopen(Am_out_name,"wb");
|
|
faw = fopen(Aw_out_name,"wb");
|
|
|
|
Wo_out_name = sprintf("%s_Wo.out", samname);
|
|
fWo = fopen(Wo_out_name,"wb");
|
|
|
|
% read in bit stream and convert to ind_log[]
|
|
|
|
ind_log = [];
|
|
fbit = fopen(bit_stream_name, "rb");
|
|
bits_per_frame = sum(bits_per_param);
|
|
nind = length(bits_per_param);
|
|
nerr = 0; nbits = 0;
|
|
[frame nread] = fread(fbit, sum(bits_per_param), "uchar");
|
|
while (nread == bits_per_frame)
|
|
|
|
% optionally introduce bit errors
|
|
|
|
error_bits = rand(sum(bits_per_param), 1) < ber;
|
|
error_bits_masked = bitand(error_bits, bit_error_mask);
|
|
frame = bitxor(frame, error_bits_masked);
|
|
nerr += sum(error_bits_masked);
|
|
nbits += sum(bits_per_param);
|
|
|
|
% read a frame, convert to indexes
|
|
|
|
nbit = 1;
|
|
ind = [];
|
|
for i=1:nind
|
|
field = frame(nbit:nbit+bits_per_param(i)-1);
|
|
nbit += bits_per_param(i);
|
|
ind = [ind bits_to_index(field, bits_per_param(i))];
|
|
end
|
|
ind_log = [ind_log; ind];
|
|
[frame nread] = fread(fbit, sum(bits_per_param), "uchar");
|
|
endwhile
|
|
fclose(fbit);
|
|
printf("nerr: %d nbits: %d %f\n", nerr, nbits, nerr/nbits);
|
|
|
|
% convert ind_log to modem params
|
|
|
|
frames = 4*length(ind_log);
|
|
model_ = zeros(frames, max_amp+2);
|
|
v = zeros(frames,1);
|
|
D1_log = [];
|
|
|
|
fdec = 1;
|
|
for f=1:4:frames
|
|
ind_Wo = ind_log(fdec,1);
|
|
|
|
Wo = decode_log_Wo(ind_Wo, 6);
|
|
L = floor(pi/Wo);
|
|
L = min([L max_amp-1]);
|
|
model_(f,1) = Wo;
|
|
model_(f,2) = L;
|
|
|
|
v1 = ind_log(fdec,2);
|
|
if (fdec+1) < length(ind_log)
|
|
v5 = ind_log(fdec+1,2);
|
|
else
|
|
v5 = 0;
|
|
end
|
|
v(f:f+3) = est_voicing_bits(v1, v5);
|
|
|
|
ind_vq = ind_log(fdec,3:5) + 1;
|
|
[dk_ D1_] = index_to_params(ind_vq, vq);
|
|
D1_log = [D1_log; D1_];
|
|
maskdB_ = params_to_mask(L, k, dk_, D1_);
|
|
Am_ = zeros(1,max_amp);
|
|
Am_ = 10 .^ (maskdB_(1:L)/20);
|
|
model_(f,3:(L+2)) = Am_;
|
|
|
|
fdec += 1;
|
|
end
|
|
|
|
% decoder loop -----------------------------------------------------
|
|
|
|
if train
|
|
% short circuit decoder
|
|
frames = 0;
|
|
end
|
|
|
|
% run post filter ahead of time so dec in time has post filtered frames to work with
|
|
|
|
for f=1:frames
|
|
model_(f,:) = post_filter(model_(f,:));
|
|
end
|
|
|
|
for f=1:frames
|
|
%printf("frame: %d\n", f);
|
|
L = min([model_(f,2) max_amp-1]);
|
|
Wo = model_(f,1);
|
|
Am_ = model_(f,3:(L+2));
|
|
|
|
maskdB_ = 20*log10(Am_);
|
|
|
|
if dec_in_time
|
|
% decimate mask samples in time
|
|
|
|
[maskdB_ Wo L] = decimate_frame_rate2(model_, decimate, f, frames);
|
|
model_(f,1) = Wo;
|
|
model_(f,2) = L;
|
|
end
|
|
|
|
Am_ = zeros(1,max_amp);
|
|
Am_(2:L) = 10 .^ (maskdB_(1:L-1)/20); % C array doesnt use A[0]
|
|
fwrite(fam, Am_, "float32");
|
|
fwrite(fWo, Wo, "float32");
|
|
|
|
if synth_phase
|
|
|
|
% synthesis phase spectra from magnitiude spectra using minimum phase techniques
|
|
|
|
fft_enc = 512;
|
|
model_(f,3:(L+2)) = 10 .^ (maskdB_(1:L)/20);
|
|
phase = determine_phase(model_, f);
|
|
assert(length(phase) == fft_enc);
|
|
Aw = zeros(1, fft_enc*2);
|
|
Aw(1:2:fft_enc*2) = cos(phase);
|
|
Aw(2:2:fft_enc*2) = -sin(phase);
|
|
fwrite(faw, Aw, "float32");
|
|
end
|
|
end
|
|
|
|
fclose(fam);
|
|
fclose(fWo);
|
|
if synth_phase
|
|
fclose(faw);
|
|
end
|
|
|
|
% save voicing file
|
|
|
|
v_out_name = sprintf("%s_v.txt", samname);
|
|
fv = fopen(v_out_name,"wt");
|
|
for f=1:length(v)
|
|
fprintf(fv,"%d\n", v(f));
|
|
end
|
|
fclose(fv);
|
|
|
|
endfunction
|
|
|
|
|
|
|
|
% decimate frame rate of mask, use linear interpolation in the log domain
|
|
|
|
function [maskdB_ Wo L] = decimate_frame_rate2(model, decimate, f, frames)
|
|
max_amp = 80;
|
|
|
|
% determine frames that bracket the one we need to interp
|
|
|
|
left_f = decimate*floor((f-1)/decimate)+1;
|
|
right_f = left_f + decimate;
|
|
if right_f > frames
|
|
right_f = left_f;
|
|
end
|
|
|
|
% determine fraction of each frame to use
|
|
|
|
left_fraction = 1 - mod((f-1),decimate)/decimate;
|
|
right_fraction = 1 - left_fraction;
|
|
|
|
% printf("f: %d left_f: %d right_f: %d left_fraction: %3.2f right_fraction: %3.2f \n", f, left_f, right_f, left_fraction, right_fraction)
|
|
|
|
% fit splines to left and right masks
|
|
|
|
left_Wo = model(left_f,1);
|
|
left_L = min([model(left_f,2) max_amp]);
|
|
left_AmdB = 20*log10(model(left_f,3:(left_L+2)));
|
|
left_mask_sample_freqs_kHz = (1:left_L)*left_Wo*4/pi;
|
|
|
|
right_Wo = model(right_f,1);
|
|
right_L = min([model(right_f,2) max_amp]);
|
|
right_AmdB = 20*log10(model(right_f,3:(right_L+2)));
|
|
right_mask_sample_freqs_kHz = (1:right_L)*right_Wo*4/pi;
|
|
|
|
% printf(" right_Wo: %f left_Wo: %f right_L: %d left_L %d\n",right_Wo,left_Wo,right_L,left_L);
|
|
printf("%f %f\n", left_AmdB(left_L), right_AmdB(right_L));
|
|
|
|
maskdB_left_pp = splinefit(left_mask_sample_freqs_kHz, left_AmdB, left_L);
|
|
maskdB_right_pp = splinefit(right_mask_sample_freqs_kHz, right_AmdB, right_L);
|
|
|
|
% determine mask for left and right frames, sampling at Wo for this frame
|
|
|
|
Wo = left_fraction*left_Wo + right_fraction*right_Wo;
|
|
L = floor(pi/Wo);
|
|
%Wo = model(f,1); L = model(f,2);
|
|
|
|
mask_sample_freqs_kHz = (1:L)*Wo*4/pi;
|
|
maskdB_left = ppval(maskdB_left_pp, mask_sample_freqs_kHz);
|
|
maskdB_right = ppval(maskdB_right_pp, mask_sample_freqs_kHz);
|
|
|
|
maskdB_ = left_fraction*maskdB_left + right_fraction*maskdB_right;
|
|
endfunction
|
|
|
|
#{
|
|
function amodel = post_filter(amodel)
|
|
max_amp = 80;
|
|
|
|
% post filter
|
|
|
|
L = min([amodel(2) max_amp-1]);
|
|
Wo = amodel(1);
|
|
Am_ = amodel(3:(L+2));
|
|
AmdB_ = 20*log10(Am_);
|
|
AmdB_pf = AmdB_*1.5;
|
|
AmdB_pf += max(AmdB_) - max(AmdB_pf);
|
|
amodel(3:(L+2)) = 10 .^ (AmdB_pf(1:L)/20);
|
|
endfunction
|
|
#}
|
|
|
|
|
|
% Given a matrix with indexes on each row, convert to a bit stream and
|
|
% write to file. We only write every 4th frame due to DIT
|
|
|
|
function write_bit_stream_file(fn, ind_log, bits_per_param)
|
|
fbit = fopen(fn,"wb");
|
|
decimate = 4;
|
|
|
|
% take a row of quantiser indexes, convert to bits, save to file
|
|
|
|
[frames nind] = size(ind_log);
|
|
for f=1:decimate:frames
|
|
frame_of_bits = [];
|
|
arow = ind_log(f,:);
|
|
for i=1:nind
|
|
%printf("i: %d bits_per_param: %d\n", i, bits_per_param(i));
|
|
some_bits = index_to_bits(arow(i), bits_per_param(i));
|
|
frame_of_bits = [frame_of_bits some_bits];
|
|
end
|
|
fwrite(fbit, frame_of_bits, "uchar");
|
|
end
|
|
fclose(fbit);
|
|
endfunction
|
|
|
|
|
|
|
|
% determine 4 voicing bits based on 2 decimated voicing bits
|
|
|
|
function [v] = est_voicing_bits(v1, v5)
|
|
if v1 == v5
|
|
v(1:4) = v1;
|
|
else
|
|
v(1:2) = v1;
|
|
v(3:4) = v5;
|
|
end
|
|
endfunction
|
|
|
|
|
|
function [AmdB_ residual fvec fvec_ amps] = piecewise_model(AmdB, Wo, vq, vq_m)
|
|
L = length(AmdB);
|
|
l1000 = floor(L/4);
|
|
AmdB_ = ones(1,L);
|
|
mask_sample_freqs_kHz = (1:L)*Wo*4/pi;
|
|
|
|
% fit a resonator to max of first 300 - 1000 Hz
|
|
|
|
fmin = 0.150;
|
|
lmin = floor(L*fmin/4);
|
|
[mx mx_ind] = max(AmdB(lmin+1:l1000));
|
|
amp(1) = mx;
|
|
mx_ind += lmin;
|
|
AmdB_ = parabolic_resonator(mx_ind*Wo*4/pi, mask_sample_freqs_kHz) + mx;
|
|
fr1 = mx_ind*Wo*4/pi;
|
|
|
|
% fit a 2nd resonator, must be above 1000Hz
|
|
|
|
fmin = 1;
|
|
lmin = round(fmin*L/4);
|
|
|
|
[mx mx_ind] = max(AmdB(lmin+1:L));
|
|
amp(2) = mx;
|
|
mx_ind += lmin;
|
|
AmdB_ = max(AmdB_, parabolic_resonator(mx_ind*Wo*4/pi, mask_sample_freqs_kHz) + mx);
|
|
fr2 = mx_ind*Wo*4/pi;
|
|
|
|
% fit a third resonator, must be +/- 300 Hz after 2nd resonator
|
|
|
|
residual = AmdB - AmdB_;
|
|
keep_out = [1:lmin];
|
|
lmax = round(L*3500/4000);
|
|
keep_out = [1:lmin lmax:L];
|
|
residual(keep_out) = -40;
|
|
|
|
fr2 = mx_ind*Wo*4/pi;
|
|
fmin = fr2 - 0.300;
|
|
fmax = fr2 + 0.300;
|
|
lmin = max(1, round(L*fmin/4));
|
|
lmax = min(L, round(L*fmax/4));
|
|
keep_out = [keep_out lmin:lmax];
|
|
|
|
residual = AmdB;
|
|
residual(keep_out) = -40;
|
|
|
|
if 0
|
|
figure(3); clf;
|
|
subplot(211)
|
|
plot(mask_sample_freqs_kHz, residual);
|
|
end
|
|
|
|
[mx mx_ind] = max(residual);
|
|
amp(3) = AmdB(mx_ind);
|
|
AmdB_ = max(AmdB_, parabolic_resonator(mx_ind*Wo*4/pi, mask_sample_freqs_kHz) + amp(3));
|
|
fr3 = mx_ind*Wo*4/pi;
|
|
|
|
% 4th resonator
|
|
|
|
fmin = fr3 - 0.300;
|
|
fmax = fr3 + 0.300;
|
|
|
|
lmin = max(1, round(L*fmin/4));
|
|
lmax = min(L, round(L*fmax/4));
|
|
keep_out = [keep_out lmin:lmax];
|
|
|
|
residual = AmdB - AmdB_;
|
|
residual(keep_out) = -40;
|
|
|
|
[mx mx_ind] = max(residual);
|
|
amp(4) = AmdB(mx_ind);
|
|
AmdB_ = max(AmdB_, parabolic_resonator(mx_ind*Wo*4/pi, mask_sample_freqs_kHz) + amp(4));
|
|
fr4 = mx_ind*Wo*4/pi;
|
|
|
|
if 0
|
|
subplot(212)
|
|
plot(mask_sample_freqs_kHz, residual);
|
|
end
|
|
|
|
printf("\nfr1: %f fr2: %f fr3: %f fr4: %f\n", fr1, fr2, fr3, fr4);
|
|
[fvec fvec_ind] = sort([fr1 fr2 fr3 fr4]);
|
|
amps = amp(fvec_ind(1:4));
|
|
|
|
fvec_ = zeros(1, 4);
|
|
|
|
#{
|
|
% optional VQ of frequencies
|
|
|
|
if nargin == 4
|
|
AmdB_ = ones(1,L);
|
|
[mes fvec_ ind] = mbest(vq, fvec, vq_m);
|
|
for i=1:4
|
|
an_amp = amp(fvec_ind(i));
|
|
AmdB_ = max(AmdB_, parabolic_resonator(fvec_(i), mask_sample_freqs_kHz) + an_amp);
|
|
end
|
|
end
|
|
#}
|
|
|
|
% optional VQ of amplitudes
|
|
|
|
if nargin == 4
|
|
AmdB_ = ones(1,L);
|
|
%amps_(1) = amps(1);
|
|
%[mes tmp ind] = mbest(vq, amps(2:4) - amps_(1), vq_m);
|
|
%amps_(2:4) = amps_(1) + tmp;
|
|
[mes amps_ ind] = mbest(vq, amps, vq_m);
|
|
amps-amps_
|
|
for i=1:4
|
|
AmdB_ = max(AmdB_, parabolic_resonator(fvec(i), mask_sample_freqs_kHz) + amps_(i));
|
|
end
|
|
end
|
|
|
|
%amps = amps(2:4) - amps(1);
|
|
endfunction
|
|
|
|
|
|
% find best place for resonator by closed loop min MSE search
|
|
|
|
function lmin = abys(AmdB_, AmdB, Wo, L, mask_sample_freqs_kHz)
|
|
lstart = round(L/4);
|
|
lmin = lstart;
|
|
emin = 1E6;
|
|
|
|
printf("lstart: %d L: %d\n", lstart, L);
|
|
|
|
figure(3);
|
|
subplot(211)
|
|
plot(mask_sample_freqs_kHz*1000, AmdB,'r+-');
|
|
|
|
e = zeros(1,L);
|
|
for l=lstart:L
|
|
|
|
% calc mse
|
|
|
|
f_l = l*Wo*4/pi;
|
|
AmdB_l = max(AmdB_, parabolic_resonator(f_l, mask_sample_freqs_kHz) + AmdB(l));
|
|
hold on;
|
|
if l == 23
|
|
plot(mask_sample_freqs_kHz*1000, AmdB_l,'c');
|
|
end
|
|
hold off;
|
|
e(l) = sum((AmdB_l - AmdB) .^ 2);
|
|
%printf("l: %5d f_l: %4.3f e: %4.0f emin: %4.0f lmin: %5d\n", l, f_l, emin, lmin);
|
|
printf("l: %5d f_l: %4.3f e: %4.0f emin: %4.0f lmin: %5d\n", l, f_l, e(l), emin, lmin);
|
|
if e(l) < emin
|
|
emin = e(l);
|
|
lmin = l;
|
|
end
|
|
end
|
|
|
|
subplot(212)
|
|
plot(mask_sample_freqs_kHz*1000, e)
|
|
endfunction
|
|
|
|
|
|
function rate_K_surface_no_slope = remove_slope(rate_K_surface)
|
|
[frames K] = size(rate_K_surface);
|
|
rate_K_surface_no_slope = zeros(frames,K);
|
|
for f=1:frames
|
|
[gradient intercept] = linreg(1:K, rate_K_surface(f,:), K);
|
|
printf("f: %d gradient: %f intercept: %f\n", f, gradient, intercept);
|
|
rate_K_surface_no_slope(f,:) = rate_K_surface(f,:) - (intercept + gradient*(1:K));
|
|
end
|
|
endfunction
|
|
|