You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1420 lines
38 KiB
1420 lines
38 KiB
% newamp2.m
|
|
%
|
|
% Copyright David Rowe 2018
|
|
% This program is distributed under the terms of the GNU General Public License
|
|
% Version 2
|
|
%
|
|
% Library of Octave functions to explore new ideas in amplitude
|
|
% (spectral envelope) modelling. See newamp2_fby (frame by frame
|
|
% analysis) and newamp2_batch (batch processing for listening tests)
|
|
%
|
|
|
|
1;
|
|
|
|
% --------------------------------------------------------------------------------
|
|
% Functions used by rate K mel work
|
|
% --------------------------------------------------------------------------------
|
|
|
|
function y = lanczos2(x)
|
|
y = sinc(x).*sinc(x/2);
|
|
endfunction
|
|
|
|
function y = interp_lanczos(xp, yp, xp_max, x)
|
|
|
|
y = zeros(1,length(x));
|
|
k = 1;
|
|
for i=1:length(x)
|
|
% find closest sample in xp just greater than xi
|
|
xi = x(i);
|
|
while (xp(k) <= xi) && (k < length(xp)-2)
|
|
k++;
|
|
end
|
|
|
|
% we'd like to use k-2 .. k+2, but we need to stay inside limits of xp
|
|
|
|
k_st = k - 2; k_st = max(1,k_st);
|
|
k_en = k + 2; k_en = min(length(xp),k_en);
|
|
% printf("i: %d xi: %f k: %d k_st: %d k_en: %d\n", i, xi, k, k_st, k_en);
|
|
|
|
% map frequencies to x in -2 ... + 2
|
|
|
|
delta = xp(2) - xp(1);
|
|
xl = (xp(k_st:k_en) - xi)/delta;
|
|
|
|
y(i) = lanczos2(xl) * yp(k_st:k_en)';
|
|
|
|
end
|
|
|
|
endfunction
|
|
|
|
|
|
% General 2nd order parabolic interpolator. Used splines orginally,
|
|
% but this is much simpler and we don't need much accuracy. Given two
|
|
% vectors of points xp and yp, find interpolated values y at points x
|
|
%
|
|
|
|
% If a point in x is less than the smallest point in xp, we linearly
|
|
% interpolate down to (0,0). If a point in x is greater than the
|
|
% greatest value in xp, we linearly interpolate down to (xp_max, 0)
|
|
|
|
function y = interp_para(xp, yp, xp_max, x)
|
|
assert( (length(xp) >=3) && (length(yp) >= 3) );
|
|
|
|
y = zeros(1,length(x));
|
|
k = 1;
|
|
for i=1:length(x)
|
|
xi = x(i);
|
|
|
|
% k is index into xp of where we start 3 points used to form parabola
|
|
|
|
while (xp(k) < xi) && (k < length(xp))
|
|
k++;
|
|
end
|
|
|
|
%printf("xi: %f k = %d\n", xi, k);
|
|
if k == 1
|
|
% linear interpolation at low end
|
|
x1 = 0; y1 = 0;
|
|
x2 = xp(k); y2 = yp(k);
|
|
b = (y2-y1)/(x2-x1);
|
|
y(i) = b*(xi-x2) + y2;
|
|
%printf("lin1 k: %d i: %d xi: %f x1: %f y1: %f\n", k, i, xi, x1, y1);
|
|
elseif k < length(xp)
|
|
% parabolic interpolation
|
|
x1 = xp(k-1); y1 = yp(k-1);
|
|
x2 = xp(k); y2 = yp(k);
|
|
x3 = xp(k+1); y3 = yp(k+1);
|
|
a = ((y3-y2)/(x3-x2)-(y2-y1)/(x2-x1))/(x3-x1);
|
|
b = ((y3-y2)/(x3-x2)*(x2-x1)+(y2-y1)/(x2-x1)*(x3-x2))/(x3-x1);
|
|
y(i) = a*(xi-x2)^2 + b*(xi-x2) + y2;
|
|
%printf("para1 k: %d i: %d xi: %f x1: %f y1: %f\n", k, i, xi, x1, y1);
|
|
elseif (k == length(xp)) && (xi < xp(k))
|
|
% parabolic interpolation, but shift xp points back by 1
|
|
x1 = xp(k-2); y1 = yp(k-2);
|
|
x2 = xp(k-1); y2 = yp(k-1);
|
|
x3 = xp(k); y3 = yp(k);
|
|
a = ((y3-y2)/(x3-x2)-(y2-y1)/(x2-x1))/(x3-x1);
|
|
b = ((y3-y2)/(x3-x2)*(x2-x1)+(y2-y1)/(x2-x1)*(x3-x2))/(x3-x1);
|
|
y(i) = a*(xi-x2)^2 + b*(xi-x2) + y2;
|
|
%printf("para2 k: %d i: %d xi: %f x1: %f y1: %f\n", k, i, xi, x1, y1);
|
|
elseif k == length(xp)
|
|
% linear interpolation at high end
|
|
x1 = xp(k); y1 = yp(k);
|
|
x2 = xp_max; y2 = 0;
|
|
b = (y2-y1)/(x2-x1);
|
|
y(i) = b*(xi-x1) + y1;
|
|
%printf("lin2 k: %d i: %d xi: %f x1: %f y1: %f\n", k, i, xi, x1, y1);
|
|
end
|
|
|
|
end
|
|
endfunction
|
|
|
|
|
|
#{
|
|
% choose largest sample in band, idea is we care more about finding
|
|
% peaks, can handle some error in frequency. x are non linear
|
|
% (arbitrary) sampling points in kHz
|
|
|
|
function y = interp_largest(f0_Hz, AmdB, x_kHz)
|
|
L = length(AmdB);
|
|
x = x_kHz*1000;
|
|
y = zeros(1,length(x));
|
|
bw = x(2) - x(1);
|
|
k = 1;
|
|
|
|
for i=1:length(x)
|
|
|
|
% determine limits of this band
|
|
|
|
if i>1
|
|
bw = x(i) - x(i-1);
|
|
end
|
|
band_low = x(i) - bw/2; band_high = x(i) + bw/2;
|
|
|
|
% map band limits to harmonics
|
|
|
|
if x(i) < f0_Hz
|
|
m_low = m_high = 1;
|
|
else
|
|
m_low = round(band_low/f0_Hz); m_high = round(band_high/f0_Hz)-1;
|
|
m_low = max(1, m_low); m_high = min(L, m_high); m_high = max(m_low, m_high);
|
|
end
|
|
|
|
printf("L: %d f0: %f i: %d band_low: %f band_high: %f m_low: %d m_high: %d\n",L, f0_Hz, i, band_low, band_high, m_low, m_high);
|
|
% find max in band
|
|
|
|
y(i) = max(AmdB(m_low:m_high));
|
|
end
|
|
|
|
endfunction
|
|
#}
|
|
|
|
% simple linear interpolator
|
|
|
|
function y = interp_linear(xp, yp, x)
|
|
assert( (length(xp) == 2) && (length(yp) == 2) );
|
|
|
|
m = (yp(2) - yp(1))/(xp(2) - xp(1));
|
|
c = yp(1) - m*xp(1);
|
|
|
|
y = zeros(1,length(x));
|
|
for i=1:length(x)
|
|
y(i) = m*x(i) + c;
|
|
end
|
|
endfunction
|
|
|
|
% quantise input sample to nearest value in table, optionally return binary code
|
|
% should work with vectors and weights too
|
|
|
|
function [quant_out best_i bits] = quantise(levels, quant_in, weights=1)
|
|
|
|
% if a scaler quantiser make it a col vector
|
|
if rows(levels) == 1
|
|
levels = levels';
|
|
end
|
|
|
|
% find closest quantiser level
|
|
|
|
best_se = 1E32;
|
|
for i=1:length(levels)
|
|
se = sum( ((levels(i,:) - quant_in).^2) .* weights );
|
|
if se < best_se
|
|
quant_out = levels(i,:);
|
|
best_se = se;
|
|
best_i = i;
|
|
end
|
|
end
|
|
|
|
% convert index to binary bits
|
|
|
|
bits = index_to_bits(best_i-1, ceil(log2(length(levels))));
|
|
|
|
endfunction
|
|
|
|
|
|
% Quantisation functions for Wo in log freq domain
|
|
|
|
function index = encode_log_Wo(Wo, bits)
|
|
Wo_levels = 2.^bits;
|
|
Wo_min = 2*pi/160;
|
|
Wo_max = 2*pi/20;
|
|
|
|
norm = (log10(Wo) - log10(Wo_min))/(log10(Wo_max) - log10(Wo_min));
|
|
index = floor(Wo_levels * norm + 0.5);
|
|
index = max(index, 0);
|
|
index = min(index, Wo_levels-1);
|
|
endfunction
|
|
|
|
function Wo = decode_log_Wo(index, bits)
|
|
Wo_levels = 2.^bits;
|
|
Wo_min = 2*pi/160;
|
|
Wo_max = 2*pi/20;
|
|
|
|
step = (log10(Wo_max) - log10(Wo_min))/Wo_levels;
|
|
Wo = log10(Wo_min) + step*index;
|
|
|
|
Wo = 10 .^ Wo;
|
|
endfunction
|
|
|
|
|
|
% convert index to binary bits
|
|
|
|
function bits = index_to_bits(value, numbits)
|
|
levels = 2.^numbits;
|
|
bits = zeros(1, numbits);
|
|
for b=1:numbits
|
|
bits(b) = bitand(value,2^(numbits-b)) != 0;
|
|
end
|
|
end
|
|
|
|
function value = bits_to_index(bits, numbits)
|
|
value = sum(2.^(numbits-1:-1:0) .* bits);
|
|
endfunction
|
|
|
|
|
|
% Determine a phase spectra from a magnitude spectra
|
|
% from http://www.dsprelated.com/showcode/20.php
|
|
% Haven't _quite_ figured out how this works but have to start somewhere ....
|
|
%
|
|
% TODO: we may be able to sample at a lower rate, like mWo
|
|
% but start with something that works
|
|
function [phase Gdbfk s Aw] = determine_phase(model, f, Nfft=512, ak)
|
|
Fs = 8000;
|
|
max_amp = 80;
|
|
L = min([model(f,2) max_amp-1]);
|
|
Wo = model(f,1);
|
|
|
|
sample_freqs_kHz = (Fs/1000)*[0:Nfft/2]/Nfft; % fft frequency grid (nonneg freqs)
|
|
Am = model(f,3:(L+3));
|
|
AmdB = 20*log10(Am);
|
|
rate_L_sample_freqs_kHz = (1:L)*Wo*4/pi;
|
|
Gdbfk = interp_lanczos(rate_L_sample_freqs_kHz, AmdB, Fs/(2*1000), sample_freqs_kHz);
|
|
|
|
% Gdbfk = resample_mask(model, f, mask_sample_freqs_kHz);
|
|
|
|
% optional input of aks for testing
|
|
|
|
if nargin == 4
|
|
Aw = 1 ./ fft(ak,Nfft);
|
|
Gdbfk = 20*log10(abs(Aw(1:Nfft/2+1)));
|
|
end
|
|
|
|
[phase s] = mag_to_phase(Gdbfk, Nfft);
|
|
|
|
endfunction
|
|
|
|
|
|
% Non linear sampling of frequency axis, reducing the "rate" is a
|
|
% first step before VQ
|
|
|
|
function mel = ftomel(fHz)
|
|
mel = floor(2595*log10(1+fHz/700)+0.5);
|
|
endfunction
|
|
|
|
|
|
function rate_K_sample_freqs_kHz = mel_sample_freqs_kHz(K, fstart_hz=100, fend_hz=0.95*4000)
|
|
mel_start = ftomel(fstart_hz); mel_end = ftomel(fend_hz);
|
|
step = (mel_end-mel_start)/(K-1);
|
|
mel = mel_start:step:mel_end;
|
|
rate_K_sample_freqs_Hz = 700*((10 .^ (mel/2595)) - 1);
|
|
rate_K_sample_freqs_kHz = rate_K_sample_freqs_Hz/1000;
|
|
endfunction
|
|
|
|
|
|
function plot_mel_sample_freqs(K, f_start_hz, f_end_hz)
|
|
rate_K_sample_freqs_kHz = mel_sample_freqs_kHz(K, f_start_hz, f_end_hz);
|
|
figure(1); clf;
|
|
plot(rate_K_sample_freqs_kHz,'+');
|
|
endfunction
|
|
|
|
|
|
function [rate_K_surface rate_K_sample_freqs_kHz] = resample_const_rate_f_mel(model, K, Fs=8000, interp_alg='lanc')
|
|
rate_K_sample_freqs_kHz = mel_sample_freqs_kHz(K, 100, 0.95*Fs/2);
|
|
rate_K_surface = resample_const_rate_f(model, rate_K_sample_freqs_kHz, Fs, interp_alg);
|
|
endfunction
|
|
|
|
|
|
% Resample Am from time-varying rate L=floor(pi/Wo) to fixed rate K. This can be viewed
|
|
% as a 3D surface with time, freq, and ampitude axis.
|
|
|
|
function [rate_K_surface rate_K_sample_freqs_kHz] = resample_const_rate_f(model, rate_K_sample_freqs_kHz, Fs, interp_alg='lanc')
|
|
|
|
% convert rate L=pi/Wo amplitude samples to fixed rate K
|
|
|
|
max_amp = 160;
|
|
[frames col] = size(model);
|
|
K = length(rate_K_sample_freqs_kHz);
|
|
rate_K_surface = zeros(frames, K);
|
|
|
|
for f=1:frames
|
|
Wo = model(f,1);
|
|
L = min([model(f,2) max_amp-1]);
|
|
Am = model(f,3:(L+2));
|
|
AmdB = 20*log10(Am);
|
|
|
|
clip_en = 0;
|
|
if clip_en
|
|
% clip between peak and peak -50dB, to reduce dynamic range
|
|
|
|
AmdB_peak = max(AmdB);
|
|
AmdB(find(AmdB < (AmdB_peak-50))) = AmdB_peak-50;
|
|
end
|
|
|
|
rate_L_sample_freqs_kHz = (1:L)*Wo*Fs/(2000*pi);
|
|
|
|
%rate_K_surface(f,:) = interp1([0 rate_L_sample_freqs_kHz (Fs/2000)], [AmdB(1) AmdB AmdB(L)], rate_K_sample_freqs_kHz, "spline");
|
|
if strcmp(interp_alg, 'para')
|
|
rate_K_surface(f,:) = interp_para(rate_L_sample_freqs_kHz, AmdB, Fs/(2*1000), rate_K_sample_freqs_kHz);
|
|
end
|
|
if strcmp(interp_alg, 'lanc')
|
|
rate_K_surface(f,:) = interp_lanczos(rate_L_sample_freqs_kHz, AmdB, Fs/(2*1000), rate_K_sample_freqs_kHz);
|
|
end
|
|
|
|
% equalise energy betweerate L and rate K samples. This was required for interframe decimation/interpolation
|
|
% when Wo is jumping about, e.g. UV frames
|
|
|
|
energy_L = sum(Am.^2);
|
|
rate_K_vec_lin = 10 .^ (rate_K_surface(f,:)/20);
|
|
energy_K = sum(rate_K_vec_lin .^ 2);
|
|
g = sqrt(energy_L/energy_K);
|
|
rate_K_surface(f,:) += 20*log10(g);
|
|
%printf("%d\n", f);
|
|
end
|
|
%printf("\n");
|
|
endfunction
|
|
|
|
|
|
% Take a rate K surface and convert back to time varying rate L
|
|
|
|
function [model_ AmdB_] = resample_rate_L(model, rate_K_surface, rate_K_sample_freqs_kHz, Fs=8000, interp_alg='lanc')
|
|
max_amp = 160; K = columns(rate_K_surface);
|
|
[frames col] = size(model);
|
|
|
|
AmdB_ = zeros(frames, max_amp);
|
|
model_ = zeros(frames, max_amp+2);
|
|
for f=1:frames
|
|
Wo = model(f,1);
|
|
L = model(f,2);
|
|
rate_L_sample_freqs_kHz = (1:L)*Wo*Fs/(2000*pi);
|
|
|
|
% back down to rate L
|
|
|
|
% dealing with end effects is an ongoing issue.....need a better solution
|
|
|
|
if strcmp(interp_alg, 'para')
|
|
AmdB_(f,1:L) = interp_para(rate_K_sample_freqs_kHz, rate_K_surface(f,:), Fs/(2*1000), rate_L_sample_freqs_kHz);
|
|
end
|
|
if strcmp(interp_alg, 'lanc')
|
|
AmdB_(f,1:L) = interp_lanczos(rate_K_sample_freqs_kHz, rate_K_surface(f,:), Fs/(2*1000), rate_L_sample_freqs_kHz);
|
|
end
|
|
if strcmp(interp_alg, 'lancmel')
|
|
rate_K_sample_freqs_mel = ftomel(rate_K_sample_freqs_kHz*1000);
|
|
rate_L_sample_freqs_mel = ftomel(rate_L_sample_freqs_kHz*1000);
|
|
AmdB_(f,1:L) = interp_lanczos(rate_K_sample_freqs_mel, rate_K_surface(f,:), Fs/(2*1000), rate_L_sample_freqs_mel);
|
|
end
|
|
|
|
% equalise energy
|
|
|
|
Am_ = 10 .^ (AmdB_(f, 1:L)/20);
|
|
energy_L = sum(Am_.^2);
|
|
rate_K_vec_lin = 10 .^ (rate_K_surface(f,:)/20);
|
|
energy_K = sum(rate_K_vec_lin .^ 2);
|
|
g = sqrt(energy_K/energy_L);
|
|
Am_ *= g;
|
|
|
|
model_(f,1) = Wo; model_(f,2) = L; model_(f,3:(L+2)) = Am_;
|
|
end
|
|
endfunction
|
|
|
|
|
|
% Decoder side interpolation of Wo and voicing, to go from 25 Hz
|
|
% sample rate used over channel to 100Hz internal sample rate of Codec
|
|
% 2.
|
|
|
|
function [Wo_ voicing_] = interp_Wo_v(Wo1, Wo2, voicing1, voicing2)
|
|
M = 4;
|
|
max_amp = 80;
|
|
|
|
Wo_ = zeros(1,M);
|
|
voicing_ = zeros(1,M);
|
|
if !voicing1 && !voicing2
|
|
Wo_(1:M) = 2*pi/100;
|
|
end
|
|
|
|
if voicing1 && !voicing2
|
|
Wo_(1:M/2) = Wo1;
|
|
Wo_(M/2+1:M) = 2*pi/100;
|
|
voicing_(1:M/2) = 1;
|
|
end
|
|
|
|
if !voicing1 && voicing2
|
|
Wo_(1:M/2) = 2*pi/100;
|
|
Wo_(M/2+1:M) = Wo2;
|
|
voicing_(M/2+1:M) = 1;
|
|
end
|
|
|
|
if voicing1 && voicing2
|
|
Wo_samples = [Wo1 Wo2];
|
|
Wo_(1:M) = interp_linear([1 M+1], Wo_samples, 1:M);
|
|
voicing_(1:M) = 1;
|
|
end
|
|
|
|
#{
|
|
printf("f: %d f+M/2: %d Wo: %f %f (%f %%) v: %d %d \n", f, f+M/2, model(f,1), model(f+M/2,1), 100*abs(model(f,1) - model(f+M/2,1))/model(f,1), voicing(f), voicing(f+M/2));
|
|
for i=f:f+M/2-1
|
|
printf(" f: %d v: %d v_: %d Wo: %f Wo_: %f\n", i, voicing(i), voicing_(i), model(i,1), model_(i,1));
|
|
end
|
|
#}
|
|
endfunction
|
|
|
|
|
|
function [rate_K_vec_corrected orig_error error nasty_error_log nasty_error_m_log] = correct_rate_K_vec(rate_K_vec, rate_K_sample_freqs_kHz, AmdB, AmdB_, K, Wo, L, Fs)
|
|
|
|
% aliasing correction --------------------------------------
|
|
|
|
% The mel sample rate decreases as frequency increases. Look for
|
|
% any regions above 1000Hz where we have missed definition of a
|
|
% spectral peak (formant) due to aliasing. Adjust the rate K
|
|
% sample levels to restore peaks. Theory is that correct
|
|
% definition of a formant is less important than the frequency of
|
|
% the formant. As long as we define a formant in that general
|
|
% frequency area it will sound OK.
|
|
|
|
Am_freqs_kHz = (1:L)*Wo*Fs/(2000*pi);
|
|
|
|
% Lets see where we have made an error
|
|
|
|
error = orig_error = AmdB(1:L) - AmdB_(1:L);
|
|
|
|
Ncorrections = 3; % maximum number of rate K samples to correct
|
|
error_thresh = 3; % only worry about errors larger than thresh
|
|
|
|
start_m = floor(L*1000/(Fs/2));
|
|
error(1:start_m) = 0; % first 1000Hz is densly sampled so ignore
|
|
nasty_error_m_log = []; nasty_error_log = [];
|
|
|
|
|
|
rate_K_vec_corrected = rate_K_vec;
|
|
for i=1:Ncorrections
|
|
[mx mx_m] = max(error);
|
|
|
|
if mx > error_thresh
|
|
nasty_error_log = [nasty_error_log mx];
|
|
nasty_error_m_log = [nasty_error_m_log mx_m];
|
|
|
|
% find closest rate K sample to nasty error
|
|
|
|
nasty_error_freq = mx_m*Wo*Fs/(2*pi*1000);
|
|
[tmp closest_k] = min(abs(rate_K_sample_freqs_kHz - nasty_error_freq));
|
|
rate_K_vec_corrected(closest_k) = AmdB(mx_m);
|
|
|
|
% zero out error in this region and look for another large error region
|
|
|
|
k = max(1, closest_k-1);
|
|
rate_K_prev_sample_kHz = rate_K_sample_freqs_kHz(k);
|
|
k = min(K, closest_k+1);
|
|
rate_K_next_sample_kHz = rate_K_sample_freqs_kHz(k);
|
|
|
|
[tmp st_m] = min(abs(Am_freqs_kHz - rate_K_prev_sample_kHz));
|
|
[tmp en_m] = min(abs(Am_freqs_kHz - rate_K_next_sample_kHz));
|
|
if closest_k == K
|
|
en_m = L;
|
|
end
|
|
error(st_m:en_m) = 0;
|
|
end
|
|
end
|
|
endfunction
|
|
|
|
|
|
% Given a vector of rate K samples, deltaf encodes/decodes delta
|
|
% amplitude using a huffman encoder, returning quantised samples and
|
|
% bit stream
|
|
|
|
function [rate_K_vec_ bits] = deltaf_quantise_rate_K_huff(rate_K_vec, E, nbits_max)
|
|
K = length(rate_K_vec);
|
|
nbits_remaining = nbits_max;
|
|
|
|
% start with k=3, around 250Hz, we assume that's quantised as the
|
|
% mean frame energy, as samples before that might be stuck in the
|
|
% HPF.
|
|
|
|
rate_K_vec_ = zeros(1,K);
|
|
rate_K_vec_(3) = E;
|
|
|
|
% encoding of differences
|
|
|
|
levels = [0 6 -6 -12 12]; symbols = {[0 0],[1 0],[1 1],[0 1 0;],[0 1 1]};
|
|
|
|
% this is pretty coarse (note no 0dB level) but sounds OK
|
|
|
|
%levels = [-6 +6 -12 +12]; symbols = {[0 0],[1 0],[1 1],[0 1]};
|
|
|
|
% move backwards to get target for first two samples
|
|
|
|
bits = [];
|
|
for m=2:-1:1
|
|
target = rate_K_vec(m) - rate_K_vec_(m+1);
|
|
[target_ best_i] = quantise(levels, target);
|
|
bits = [bits symbols{best_i}];
|
|
nbits_remaining -= length(symbols{best_i});
|
|
rate_K_vec_(m) = rate_K_vec_(m+1) + target_;
|
|
end
|
|
|
|
% then forwards for rest of the target samples
|
|
|
|
for m=4:K
|
|
target = rate_K_vec(m) - rate_K_vec_(m-1);
|
|
if nbits_remaining >= 3
|
|
[target_ best_i] = quantise(levels, target);
|
|
bits = [bits symbols{best_i}];
|
|
nbits_remaining -= length(symbols{best_i});
|
|
elseif nbits_remaining == 2
|
|
[target_ best_i] = quantise(levels(1:3), target);
|
|
bits = [bits symbols{best_i}];
|
|
nbits_remaining = 0;
|
|
elseif nbits_remaining < 2
|
|
target_ = -6; % if we've run out of bits just tail off
|
|
end
|
|
rate_K_vec_(m) = target_ + rate_K_vec_(m-1);
|
|
%printf("m: %d length: %d nbits_remaining: %d\n", m, length(bits), nbits_remaining);
|
|
end
|
|
|
|
bits = [bits zeros(1,nbits_remaining)];
|
|
endfunction
|
|
|
|
|
|
% Given a vector of bits, and the k=3 frame amplitude sample E, decode to a
|
|
% rate K vector of huffman encoded quantised spectral amplitude samples
|
|
|
|
function rate_K_vec_ = deltaf_decode_rate_K_huff(bits, E, K)
|
|
|
|
% start with k=3, around 250Hz, we assume that's quantised as the
|
|
% mean frame energy, as samples before that might be stuck in the
|
|
% HPF.
|
|
|
|
rate_K_vec_ = zeros(1,K);
|
|
rate_K_vec_(3) = E;
|
|
|
|
% move backwards to get target for first two samples
|
|
|
|
for m=2:-1:1
|
|
[target_ bits] = deltaf_dec_one_symbol(bits);
|
|
rate_K_vec_(m) = rate_K_vec_(m+1) + target_;
|
|
end
|
|
|
|
% then forwards for rest of the target samples
|
|
|
|
for m=4:K
|
|
if length(bits) >= 2
|
|
[target_ bits] = deltaf_dec_one_symbol(bits);
|
|
else
|
|
target_ = -6; % if we've run out of bits just tail off
|
|
end
|
|
rate_K_vec_(m) = target_ + rate_K_vec_(m-1);
|
|
%printf("m: %d nbits_remaining: %d\n", m, length(bits));
|
|
end
|
|
endfunction
|
|
|
|
|
|
% decode one symbol and truncate bits array
|
|
|
|
function [target_ bits] = deltaf_dec_one_symbol(bits)
|
|
levels = [0 6 -6 -12 12]; symbols = {[0 0],[1 0],[1 1],[0 1 0;],[0 1 1]};
|
|
|
|
two_bits = bits(1:2);
|
|
|
|
if two_bits == [0 1]
|
|
if length(bits) > 2
|
|
% OK a three bit code
|
|
if bits(3) == 0
|
|
target_ = levels(4);
|
|
else
|
|
target_ = levels(5);
|
|
end
|
|
bits = bits(4:end);
|
|
else
|
|
# we must have a bit error
|
|
target_ = levels(5);
|
|
bits = [];
|
|
end
|
|
else
|
|
if two_bits == [0 0]
|
|
target_ = levels(1);
|
|
end
|
|
if two_bits == [1 0]
|
|
target_ = levels(2);
|
|
end
|
|
if two_bits == [1 1]
|
|
target_ = levels(3);
|
|
end
|
|
bits = bits(3:end);
|
|
end
|
|
endfunction
|
|
|
|
|
|
function rate_K_vec_ = dct_quantise_rate_K(rate_K_vec)
|
|
K = length(rate_K_vec);
|
|
D = dct(rate_K_vec);
|
|
printf("\n");
|
|
D_ = 8*round(D/8);
|
|
D_(1) = D(1);
|
|
for d=1:K
|
|
printf("%4d",round(D(d)/8));
|
|
end
|
|
rate_K_vec_ = idct(D_);
|
|
endfunction
|
|
|
|
|
|
function un(sl)
|
|
U = unique(sl,"rows")
|
|
cnt = [];
|
|
for v=1:length(U)
|
|
sm = sum(ismember(sl,U(v,:),"rows"));
|
|
cnt = [cnt sm];
|
|
end
|
|
s = sort(cnt, "descend");
|
|
figure(1); clf; subplot(211); plot(s); subplot(212); plot(cumsum(s));
|
|
endfunction
|
|
|
|
|
|
% Joint Wo and LPC energy vector quantiser developed by Jean-Marc Valin.
|
|
% Octave port of functions in quantise.c
|
|
|
|
function w = compute_weights2(x, xp)
|
|
w(1) = 30;
|
|
w(2) = 1;
|
|
if x(2) < 0
|
|
w(1) *= 0.6;
|
|
w(2) *= 0.3;
|
|
end
|
|
if x(2) < -10
|
|
w(1) *= 0.3;
|
|
w(2) *= 0.3;
|
|
end
|
|
|
|
% Higher weight if pitch is stable
|
|
|
|
if abs(x(1)-xp(1)) < 0.2
|
|
w(1) *= 2;
|
|
w(2) *= 1.5;
|
|
elseif abs(x(1)-xp(1)) > 0.5
|
|
% Lower if not stable
|
|
w(1) *= 0.5;
|
|
end
|
|
|
|
% Lower weight for low energy
|
|
|
|
if x(2) < xp(2) - 10
|
|
w(2) *= 0.5;
|
|
end
|
|
if x(2) < xp(2) - 20
|
|
w(2) *= .5;
|
|
end
|
|
|
|
% Square the weights because it's applied on the squared error
|
|
|
|
w(1) *= w(1);
|
|
w(2) *= w(2);
|
|
endfunction
|
|
|
|
|
|
function [Wo_ E_ xq] = quantise_WoE(Wo, E, xq, vq)
|
|
ge_coeff = [0.8 0.9];
|
|
|
|
% VQ is only trained for Fs = 8000 Hz
|
|
|
|
Fs = 8000;
|
|
Fo_min = 50; Fo_max = 400;
|
|
P_min = Fs/Fo_max; P_max = Fs/Fo_min;
|
|
Wo_min = 2*pi/P_max;
|
|
Wo_max = 2*pi/P_min;
|
|
|
|
E = max(1,E);
|
|
|
|
x(1) = log10(Wo/Wo_min)/log10(2);
|
|
x(2) = 10.0*log10(1e-4 + E);
|
|
|
|
w = compute_weights2(x, xq);
|
|
w = [30 1];
|
|
err = x - ge_coeff .* xq;
|
|
err_ = quantise(vq, err, w);
|
|
%err_ = err;
|
|
xq = ge_coeff .* xq + err_;
|
|
|
|
#{
|
|
x = log2(4000*Wo/(PI*50));
|
|
2^x = 4000*Wo/(PI*50)
|
|
Wo = (2^x)*(PI*50)/4000;
|
|
#}
|
|
|
|
Wo_ = (2 ^ xq(1))*Wo_min;
|
|
|
|
Wo_ = min(Wo_max,Wo_);
|
|
Wo_ = max(Wo_min,Wo_);
|
|
|
|
E_ = 10.0 ^ (xq(2)/10.0);
|
|
E_ = max(1,E_);
|
|
endfunction
|
|
|
|
|
|
% Find distance of each vector in codebook using gain shape search
|
|
% Uses discrete values, in unit range
|
|
|
|
function dist_gain_shape(codebook, target)
|
|
[entries K] = size(codebook);
|
|
dist = zeros(entries,1);
|
|
best = zeros(entries,K);
|
|
for i=1:entries
|
|
min_dist = 1000;
|
|
for g=-10:20
|
|
adist = sum(abs(target - (codebook(i,:) + g)));
|
|
if adist < min_dist
|
|
min_dist = adist;
|
|
min_g = g;
|
|
end
|
|
end
|
|
dist(i) = min_dist;
|
|
best(i,:) = codebook(i,:) + min_g;
|
|
end
|
|
figure(1); clf; plot(dist);
|
|
axis([1 entries 0 K])
|
|
target
|
|
[tmp min_i] = min(dist);
|
|
target - best(min_i,:)
|
|
endfunction
|
|
|
|
|
|
% As above but without the gain
|
|
|
|
function dist_shape(codebook, target)
|
|
[entries K] = size(codebook);
|
|
dist = zeros(entries,1);
|
|
for i=1:entries
|
|
dist(i) = sum(abs(target - (codebook(i,:))));
|
|
end
|
|
figure(1); clf; plot(dist);
|
|
axis([1 entries 0 K])
|
|
target
|
|
[tmp min_i] = min(dist);
|
|
target - codebook(min_i,:)
|
|
endfunction
|
|
|
|
|
|
% Given a vector of rate K samples, deltaf encodes/decodes delta
|
|
% amplitude using a fixed bit/sample allocation, returning quantised
|
|
% samples and bit stream
|
|
|
|
function [rate_K_vec_ bits] = deltaf_quantise_rate_K_fixed(rate_K_vec, E, nbits_max)
|
|
K = length(rate_K_vec);
|
|
|
|
% start with k=3, around 250Hz, we assume that's quantised as the
|
|
% mean frame energy, as samples before that might be stuck in the
|
|
% HPF.
|
|
|
|
rate_K_vec_ = zeros(1,K);
|
|
rate_K_vec_(3) = E;
|
|
|
|
% encoding of differences
|
|
|
|
levels_2bit = [-3 +3 -9 9];
|
|
levels_3bit = [0 -6 +6 -12 +12 -18 +18 -24];
|
|
|
|
% move backwards to get target for first two samples
|
|
|
|
bits = [];
|
|
for m=2:-1:1
|
|
target = rate_K_vec(m) - rate_K_vec_(m+1);
|
|
[target_ best_i] = quantise(levels_2bit, target);
|
|
% printf("m: %d target_ %f best_i: %d\n", m, target_, best_i);
|
|
bits = [bits index_to_bits(best_i-1, 2)];
|
|
rate_K_vec_(m) = rate_K_vec_(m+1) + target_;
|
|
end
|
|
|
|
% then forwards for rest of the target samples
|
|
|
|
for m=4:9
|
|
target = rate_K_vec(m) - rate_K_vec_(m-1);
|
|
[target_ best_i] = quantise(levels_3bit, target);
|
|
bits = [bits index_to_bits(best_i-1, 3)];
|
|
rate_K_vec_(m) = target_ + rate_K_vec_(m-1);
|
|
end
|
|
for m=10:K
|
|
target = rate_K_vec(m) - rate_K_vec_(m-1);
|
|
[target_ best_i] = quantise(levels_2bit, target);
|
|
bits = [bits index_to_bits(best_i-1, 2)];
|
|
rate_K_vec_(m) = target_ + rate_K_vec_(m-1);
|
|
end
|
|
assert(length(bits) == nbits_max);
|
|
endfunction
|
|
|
|
|
|
% Given a vector of bits, and the k=3 frame amplitude sample E, decode
|
|
% to a rate K vector of encoded quantised spectral amplitude samples
|
|
% with fixed bit allocation
|
|
|
|
function rate_K_vec_ = deltaf_decode_rate_K_fixed(bits, E, K)
|
|
|
|
% start with k=3, around 250Hz, we assume that's quantised as the
|
|
% mean frame energy, as samples before that might be stuck in the
|
|
% HPF.
|
|
|
|
rate_K_vec_ = zeros(1,K);
|
|
rate_K_vec_(3) = E;
|
|
|
|
levels_2bit = [-3 +3 -9 9];
|
|
levels_3bit = [0 -6 +6 -12 +12 -18 +18 -24];
|
|
|
|
% move backwards to get target for first two samples
|
|
|
|
for m=2:-1:1
|
|
index = bits_to_index(bits(1:2),2) + 1;
|
|
target_ = levels_2bit(index);
|
|
rate_K_vec_(m) = rate_K_vec_(m+1) + target_;
|
|
bits = bits(3:end);
|
|
end
|
|
|
|
% then forwards for rest of the target samples
|
|
|
|
for m=4:9
|
|
index = bits_to_index(bits(1:3), 3) + 1;
|
|
target_ = levels_3bit(index);
|
|
rate_K_vec_(m) = rate_K_vec_(m-1) + target_;
|
|
bits = bits(4:end);
|
|
end
|
|
for m=10:K
|
|
index = bits_to_index(bits(1:2), 2) + 1;
|
|
target_ = levels_2bit(index);
|
|
rate_K_vec_(m) = rate_K_vec_(m-1) + target_;
|
|
if m != K
|
|
bits = bits(3:end);
|
|
end
|
|
end
|
|
|
|
endfunction
|
|
|
|
|
|
#{
|
|
DCT coeff scalar quantiser, analysis stage. Plots PDFs, returns a
|
|
vector of quantiser levels.
|
|
|
|
Input is matrix of K columns row-vectors that represent one frame of
|
|
spectrum at rate K.
|
|
#}
|
|
|
|
function quantiser_levels = build_dct_quantiser(train_surf, qstepdB=1)
|
|
[nr K] = size(train_surf);
|
|
|
|
% remove low energy rows
|
|
|
|
m = mean(train_surf');
|
|
figure(4); plot(m);
|
|
ind = find(m>10);
|
|
train_surf = train_surf(ind,:);
|
|
nr2 = length(ind);
|
|
%nr2 = nr;
|
|
printf("K: %d nr: %d nr2: %d\n", K, nr, nr2);
|
|
|
|
D = dct(train_surf')';
|
|
|
|
figure(1); clf;
|
|
plot(std(D));
|
|
title('Std Dev of each DCT coeff');
|
|
|
|
figure(2); clf;
|
|
nr = ceil(sqrt(K)); nc = ceil(K/nr);
|
|
|
|
Tbits = 0; quantiser_levels = [];
|
|
|
|
for k=1:K
|
|
subplot(nr, nc, k)
|
|
v = D(:,k);
|
|
printf("k: %d mean %5.2f std: %5.2f min: %5.2f max: %5.2f\n", k, mean(v), std(v), min(v), max(v));
|
|
q_max = mean(v)+2*std(v); q_min = mean(v)-2*std(v);
|
|
levels = q_min:qstepdB:q_max;
|
|
nlevels = length(levels);
|
|
Tbits += log2(nlevels);
|
|
printf(" quantiser: min: %4.2f max: %4.2f nlevels: %d bits: %2.1f\n", q_min, q_max, nlevels, log2(nlevels));
|
|
|
|
% limit for histogram
|
|
|
|
v = min(v, q_max);
|
|
v = max(v, q_min);
|
|
[nn xx] = hist(v,50);
|
|
bar (xx, nn)
|
|
|
|
v_ = zeros(nr2,1);
|
|
for r=1:nr2
|
|
v_(r) = quantise(levels, v(r));
|
|
end
|
|
|
|
E(:,k) = v_;
|
|
|
|
quantiser_levels = [quantiser_levels; q_min q_max];
|
|
end
|
|
|
|
train_surf_ = idct(E')';
|
|
error = train_surf_ - train_surf;
|
|
mse = mean(mean(error .^ 2));
|
|
figure(3)
|
|
mesh(error(1:1000,:))
|
|
printf("mse: %4.2f Tbits: %d\n", mse, Tbits);
|
|
endfunction
|
|
|
|
|
|
function bits = bits_for_this_symbol(symbols, s)
|
|
% bits/sym for top 5, assume rest 5 bits
|
|
% 11 10 00 010 0110 0111
|
|
bps = [ 2 2 2 3 4 4];
|
|
|
|
ind = find(symbols == s);
|
|
if ind <= length(bps)
|
|
bits = bps(ind);
|
|
else
|
|
bits = 5;
|
|
end
|
|
endfunction
|
|
|
|
|
|
function [surf_ D E] = dct_quantiser(surf, quantiser_levels, method=1, qstepdB=1, limit=0)
|
|
[nr K] = size(surf);
|
|
|
|
printf("K: %d nr: %d\n", K, nr);
|
|
|
|
% clamp lower limit of mean to 10dB
|
|
#{
|
|
m = mean(surf')';
|
|
surf -= m;
|
|
m = min(10,m);
|
|
surf += m;
|
|
#}
|
|
|
|
D = dct(surf')';
|
|
|
|
Tbits = 0;
|
|
|
|
if method == 1
|
|
for k=1:K
|
|
q_min = quantiser_levels(k,1); q_max = quantiser_levels(k,2);
|
|
levels = q_min:qstepdB:q_max;
|
|
nlevels = length(levels);
|
|
if nlevels == 1
|
|
levels = (q_max + q_min)/2;
|
|
end
|
|
if nlevels == 2
|
|
m = (q_max + q_min)/2;
|
|
levels = [(m - qstepdB/2) (m + qstepdB/2)];
|
|
end
|
|
if nlevels == 3
|
|
m = (q_max + q_min)/2;
|
|
levels = [q_min m q_max];
|
|
end
|
|
Tbits += log2(nlevels);
|
|
printf(" quantiser: min: %4.2f max: %4.2f nlevels: %d bits: %2.1f\n", q_min, q_max, nlevels, log2(nlevels));
|
|
|
|
v_ = zeros(nr,1);
|
|
v = D(:,k);
|
|
for r=1:nr
|
|
v_(r) = quantise(levels, v(r));
|
|
end
|
|
|
|
E(:,k) = v_;
|
|
end
|
|
end
|
|
|
|
if method == 2
|
|
% quantise
|
|
E = round(D/qstepdB);
|
|
% count symbols
|
|
symbols = []; count = [];
|
|
[nr nc]= size(E);
|
|
if limit
|
|
nc = limit-1;
|
|
end
|
|
for r=1:nr
|
|
for c=2:nc
|
|
s = E(r,c);
|
|
ind = find(symbols == s);
|
|
if length(ind)
|
|
count(ind)++;
|
|
else
|
|
symbols = [symbols s];
|
|
count(length(symbols)) = 1;
|
|
end
|
|
end
|
|
end
|
|
|
|
% sort into order
|
|
|
|
[count ind] = sort(count, "descend");
|
|
symbols = symbols(ind);
|
|
|
|
% estimate bits/symbol by huffman coding direct and differences. Clever part is we
|
|
% choose method based on min bits, which I guess costs an extra bit. It's clever
|
|
% because we need direct for quick transitions, but during steady voiced parts the
|
|
% changes are small so coding differences does a good job.
|
|
|
|
Tbits = 0; Nsyms = 0;
|
|
Nbits_direct_log = Nbits_diff_log = Nbits_log = zeros(1,nr);
|
|
|
|
for r=3:nr
|
|
Nbits_direct = 0; Nbits_diff = 0;
|
|
for c=2:nc
|
|
s_direct = E(r,c);
|
|
s_diff = E(r,c) - E(r-2,c);
|
|
Nbits_direct += bits_for_this_symbol(symbols, s_direct);
|
|
Nbits_diff += bits_for_this_symbol(symbols, s_diff);
|
|
Nsyms++;
|
|
end
|
|
Nbits_direct_log(r) += Nbits_direct;
|
|
Nbits_diff_log(r) += Nbits_diff;
|
|
Nbits_log(r) += min(Nbits_direct, Nbits_diff);
|
|
|
|
Tbits += min(Nbits_direct, Nbits_diff);
|
|
end
|
|
figure(5); clf; plot(Nbits_direct_log(3:end),'b');
|
|
smooth4 = conv(Nbits_log(3:end),[1 0 1 0 1 0 1 0])/4;
|
|
hold on; plot(Nbits_diff_log(3:end),'g'); plot(Nbits_log(3:end),'r'); hold off;
|
|
figure(6); clf; plot(smooth4,'m');
|
|
|
|
ent = 0;
|
|
for i=1:length(symbols)
|
|
wi = count(i)/sum(count);
|
|
printf("%2d %4d %6d %4.3f %4.3f\n", i, symbols(i), count(i), wi, -wi*log2(wi));
|
|
ent += -wi*log2(wi);
|
|
end
|
|
printf("mean bits/frame: %3.1f mean bits/sym: %3.2f entropy: %3.2f bits\n", mean(Nbits_log), Tbits/Nsyms, ent);
|
|
if limit
|
|
E(:, limit:end) = 0;
|
|
end
|
|
E *= qstepdB;
|
|
end
|
|
|
|
surf_ = idct(E')';
|
|
error = surf_ - surf;
|
|
mse = mean(mean(error .^ 2));
|
|
figure(3)
|
|
[nr nc] = size(error);
|
|
nr = min(nr,100);
|
|
mesh(error(1:nr,:))
|
|
figure(4)
|
|
mesh(D(25:50,1:10))
|
|
printf("mse: %4.2f Tbits: %d\n", mse, Tbits);
|
|
endfunction
|
|
|
|
|
|
# Generate a huffman code from a matrix (surface) of spectral
|
|
# magnitudes. The Huffman code can be used for encoding the DCT of the
|
|
# rows of the surface (mag samples of each frame).
|
|
#
|
|
# Set qstepdB to the quanisation step size, e.g. 3 or 6dB is about where we can
|
|
# notice the effect of quantisation of the DCTs (and spectral magnitides)
|
|
#
|
|
# Set "max_dcts" to max number of dcts coeffs you will quantise, as this affects
|
|
# probabilities (we get many zeros in high order coeffs)
|
|
#
|
|
# octave:49> newamp2; p_table = design_huffman_enc(all_surf(:,2:35), 6, 18);
|
|
|
|
function [symbols huff] = design_huffman_enc(surf, qstepdB=6, max_dcts=18)
|
|
[nr K] = size(surf);
|
|
|
|
printf("K: %d nr: %d qstepdB: %3.2f\n", K, nr, qstepdB);
|
|
|
|
D = dct(surf')';
|
|
|
|
% quantise to step size in dB
|
|
|
|
E = round(D/qstepdB);
|
|
|
|
% count symbols, ignoring first (DC) coeff which we will scalar quantise
|
|
|
|
symbols = []; count = [];
|
|
[nr nc]= size(E);
|
|
if max_dcts
|
|
nc = max_dcts+1;
|
|
printf("cols 2 to %d (%d total)\n", nc, nc-2+1);
|
|
end
|
|
for r=1:nr
|
|
for c=2:nc
|
|
s = E(r,c);
|
|
ind = find(symbols == s);
|
|
if length(ind)
|
|
count(ind)++;
|
|
else
|
|
symbols = [symbols s];
|
|
count(length(symbols)) = 1;
|
|
end
|
|
end
|
|
end
|
|
|
|
% sort into order
|
|
|
|
[count ind] = sort(count, "descend");
|
|
symbols = symbols(ind);
|
|
|
|
Nsymbols = sum(count);
|
|
printf("Nsymbols = %d\n", Nsymbols);
|
|
|
|
% estimate entropy
|
|
|
|
H = 0;
|
|
p_table = [];
|
|
printf(" i symb count prob wi\n");
|
|
for i=1:length(symbols)
|
|
wi = count(i)/Nsymbols; p_table = [p_table wi];
|
|
printf("%2d %4d %6d %4.3f %4.3f\n", i, symbols(i), count(i), wi, -wi*log2(wi));
|
|
H += -wi*log2(wi);
|
|
end
|
|
|
|
% design Huffman code
|
|
|
|
huff = huffmandict (1, p_table, 1);
|
|
L = 0;
|
|
for i=1:length(huff)
|
|
L += p_table(i)*length(huff{i});
|
|
end
|
|
|
|
printf("Entropy: %3.2f bits/symbol Huffman code: %3.2f bits/symbol\n", H, L);
|
|
endfunction
|
|
|
|
|
|
# Huffman encodes (and decodes) a symbol, if input symbols is out of
|
|
# range of quantiser we choose nearest symbol
|
|
|
|
function [s_ bits] = huffman_enc_symb(symbols, huff, s)
|
|
min_dist = 1E32; ind = 1;
|
|
for i=1:length(symbols)
|
|
dist = (symbols(i) - s) .^ 2;
|
|
if dist < min_dist
|
|
ind = i;
|
|
min_dist = dist;
|
|
end
|
|
end
|
|
|
|
s_ = symbols(ind);
|
|
bits = huff{ind};
|
|
endfunction
|
|
|
|
|
|
% Huffman decode a bit stream of symbols. Terminates list if we get a
|
|
% bit error in decode
|
|
|
|
function [s_ error_flag] = huffman_decode_bitstream(symbols, huff, bits)
|
|
error_flag = 0;
|
|
min_cw_length = length(huff{1});
|
|
match = 1;
|
|
s_ = [];
|
|
|
|
while ((length(bits) >= min_cw_length) && match)
|
|
|
|
% search through list of codes to find a match
|
|
|
|
match = 0;
|
|
for i=1:length(symbols)
|
|
cw = huff{i}; lcw = length(cw);
|
|
if (length(bits) >= lcw) & !match
|
|
match = isequal(cw, bits(1:lcw));
|
|
if match
|
|
s_ = [s_ symbols(i)];
|
|
bits = bits(lcw+1:end);
|
|
end
|
|
end
|
|
end
|
|
|
|
% if no match found, say due to bit error, we drop out of loop
|
|
end
|
|
|
|
if (length(bits) > min_cw_length) && !match
|
|
error_flag = 1;
|
|
end
|
|
endfunction
|
|
|
|
|
|
% Huffman encodes (and decodes) the DCTS of a surface, except first (DCT) coeff
|
|
|
|
function [surf_ dc bits_surf] = huffman_encode_surf(surf, qstepdB=6, max_dcts=18, max_bits=100, symbols, huff)
|
|
dec = 2;
|
|
[nr K] = size(surf);
|
|
|
|
% allow room for direct/diff bit
|
|
|
|
max_bits_huff = max_bits - 1;
|
|
|
|
printf("K: %d nr: %d qstepdB: %3.2f max_dcts: %d\n", K, nr, qstepdB, max_dcts);
|
|
|
|
% limit num DCTs we encode (nc) to less than K to save bits, high
|
|
% order DCTs tend to be small
|
|
|
|
nc = K;
|
|
if max_dcts
|
|
nc = max_dcts+1;
|
|
printf("cols 2 to %d (%d total)\n", nc, nc-2+1);
|
|
end
|
|
|
|
% DCT and initial quantisation to step size
|
|
|
|
D = dct(surf')';
|
|
E = D/qstepdB;
|
|
dc = E(:,1);
|
|
|
|
% bit stream for each row (frame) is stored in a cell array
|
|
|
|
bits_surf = cell(nr,1);
|
|
Nbits_log = Nbits_direct_log = Nbits_diff_log = zeros(1,nr);
|
|
E_ = zeros(nr,K); prev_row = zeros(1,nc);
|
|
Tbits = Nsyms = 0;
|
|
E_dec = zeros(nr,K);
|
|
|
|
% encode each row
|
|
|
|
for r=1:dec:nr
|
|
bits_direct_row = []; bits_diff_row = []; Nbits_direct = 0; Nbits_diff = 0;
|
|
|
|
% DC just copied directly, quantised externally
|
|
|
|
E_(r,1) = E(r,1); E_dec(r,1) = E(r,1);
|
|
|
|
direct_row_ = diff_row_ = zeros(1,nc);
|
|
ndir_row = ndiff_row = 0; len_bits_direct_row = len_bits_diff_row = 0;
|
|
|
|
for c=2:nc
|
|
|
|
% try direct quantisation
|
|
|
|
if len_bits_direct_row < max_bits_huff
|
|
s_direct = E(r,c);
|
|
[s_direct_ bits_direct] = huffman_enc_symb(symbols, huff, s_direct);
|
|
if len_bits_direct_row + length(bits_direct) <= max_bits_huff
|
|
% can we squeeze in bits for latest symbol?
|
|
bits_direct_row = [bits_direct_row bits_direct];
|
|
direct_row_(c) = s_direct_;
|
|
ndir_row++;
|
|
len_bits_direct_row = len_bits_direct_row + length(bits_direct);
|
|
else
|
|
% can't fit any more symbols? Then signal we are finished
|
|
len_bits_direct_row = max_bits_huff;
|
|
end
|
|
end
|
|
|
|
% try differential quantisation
|
|
|
|
if len_bits_diff_row < max_bits
|
|
s_diff = E(r,c) - prev_row(c);
|
|
[s_diff_ bits_diff] = huffman_enc_symb(symbols, huff, s_diff);
|
|
if len_bits_diff_row + length(bits_diff) <= max_bits_huff
|
|
bits_diff_row = [bits_diff_row bits_diff];
|
|
diff_row_(c) = s_diff_;
|
|
ndiff_row++;
|
|
len_bits_diff_row = len_bits_diff_row + length(bits_diff);
|
|
else
|
|
len_bits_diff_row = max_bits_huff;
|
|
end
|
|
end
|
|
|
|
Nsyms++;
|
|
end
|
|
|
|
% choose quant method with the least number of bits
|
|
|
|
Nbits_direct = length(bits_direct_row); Nbits_diff = length(bits_diff_row);
|
|
Nbits_direct_log(r) = Nbits_direct; Nbits_diff_log(r) = Nbits_diff;
|
|
e_direct = sum((direct_row_(2:nc) - E(r,2:nc)) .^ 2);
|
|
e_diff = sum((diff_row_(2:nc) + prev_row(2:nc) - E(r,2:nc)) .^ 2);
|
|
|
|
%if Nbits_direct < Nbits_diff
|
|
if e_direct <= e_diff
|
|
bits_surf{r} = [1 bits_direct_row];
|
|
Tbits += Nbits_direct;
|
|
E_(r,2:nc) = direct_row_(2:nc);
|
|
Nbits_log(r) = Nbits_direct; diff_flag(r) = 0;
|
|
else
|
|
bits_surf{r} = [0 bits_diff_row];
|
|
Tbits += Nbits_diff;
|
|
E_(r,2:nc) = diff_row_(2:nc) + prev_row(2:nc);
|
|
Nbits_log(r) = Nbits_diff; diff_flag(r) = 1;
|
|
end
|
|
|
|
% pad out to max_bits
|
|
|
|
bits_surf{r} = [bits_surf{r} zeros(1,max_bits - length(bits_surf{r}))];
|
|
|
|
% test huffman bitstream decoder
|
|
|
|
bits_dec = bits_surf{r};
|
|
[s_dec error_flag] = huffman_decode_bitstream(symbols, huff, bits_dec(2:end));
|
|
% printf("r: %d bits_dec(1): %d l: %d error_flag: %d\n", r, bits_dec(1), length(s_dec), error_flag);
|
|
row_dec = zeros(1,nc);
|
|
row_dec(2:length(s_dec)+1) = s_dec;
|
|
if bits_dec(1)
|
|
E_dec(r,2:nc) = row_dec(2:nc);
|
|
else
|
|
E_dec(r,2:nc) = row_dec(2:nc) + prev_row(2:nc);
|
|
end
|
|
assert(E_dec(r,2:nc) == E_(r,2:nc));
|
|
|
|
% update memory - note we use quantised symbols as that's what we have at decoder
|
|
|
|
if r >=3
|
|
prev_row = E_(r,1:nc);
|
|
|
|
% if we are decimating, interpolate DCTs to get original frame rate
|
|
|
|
if dec == 2
|
|
E_(r-1,:) = 0.5*E_(r-2,:) + 0.5*E_(r,:);
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
% transform back to surface and calculate MSE
|
|
|
|
E_ *= qstepdB;
|
|
surf_ = idct(E_')';
|
|
|
|
error = surf_ - surf;
|
|
mse = mean(mean(error(1:dec:end,:) .^ 2));
|
|
|
|
figure(1); clf;
|
|
[nr nc] = size(error);
|
|
nr = min(nr,300);
|
|
mesh(error(1:dec:nr,:))
|
|
|
|
figure(2);
|
|
subplot(122,"position",[0.7 0.05 0.25 0.85])
|
|
hist(mean(error(1:dec:end,:).^2,2));
|
|
subplot(121,"position",[0.1 0.05 0.5 0.85])
|
|
plot(mean(error(1:dec:end,:).^2,2));
|
|
title('Mean squared error per frame');
|
|
|
|
figure(3);
|
|
subplot(122,"position",[0.7 0.05 0.25 0.9])
|
|
hist(Nbits_log(1:dec:end));
|
|
subplot(121,"position",[0.1 0.05 0.5 0.9])
|
|
plot(diff_flag(1:dec:end)*10,'b;diff flag;'); hold on; plot(Nbits_log(1:dec:end),'g;Nbits/fr;'); hold off;
|
|
|
|
printf("mse: %4.2f dB^2 mean bits/frame: %3.1f mean bits/sym: %3.1f\n", mse, mean(Nbits_log(1:dec:end)), Tbits/Nsyms);
|
|
endfunction
|
|
|
|
|
|
% Returns a quantised surface from matrix of bist streams for each frame
|
|
|
|
function surf_ = huffman_decode_surf(K=34, qstepdB=6, max_dcts=18, symbols, huff, bits_surf, dc)
|
|
dec = 2;
|
|
[nr tmp] = size(bits_surf);
|
|
|
|
printf("K: %d nr: %d qstepdB: %3.2f max_dcts: %d\n", K, nr, qstepdB, max_dcts);
|
|
|
|
% limit num DCTs we encode (nc) to less than K to save bits, high
|
|
% order DCTs tend to be small
|
|
|
|
nc = K;
|
|
if max_dcts
|
|
nc = max_dcts+1;
|
|
printf("cols 2 to %d (%d total)\n", nc, nc-2+1);
|
|
end
|
|
|
|
prev_row = zeros(1,nc);
|
|
Tbits = Nsyms = 0;
|
|
E_dec = zeros(nr,K);
|
|
|
|
% decode each row
|
|
|
|
for r=1:dec:nr
|
|
bits_direct_row = []; bits_diff_row = []; Nbits_direct = 0; Nbits_diff = 0;
|
|
|
|
% DC is quantised externally
|
|
|
|
E_dec(r,1) = dc(r);
|
|
|
|
% huffman bitstream decoder
|
|
|
|
bits_dec = bits_surf{r};
|
|
[s_dec error_flag] = huffman_decode_bitstream(symbols, huff, bits_dec(2:end));
|
|
row_dec = zeros(1,nc);
|
|
row_dec(2:length(s_dec)+1) = s_dec;
|
|
if bits_dec(1)
|
|
E_dec(r,2:nc) = row_dec(2:nc);
|
|
else
|
|
E_dec(r,2:nc) = row_dec(2:nc) + prev_row(2:nc);
|
|
end
|
|
|
|
% update memory for diff decoder
|
|
|
|
if r >=3
|
|
prev_row = E_dec(r,1:nc);
|
|
|
|
% if we are decimating, interpolate DCTs to get original frame rate
|
|
|
|
if dec == 2
|
|
E_dec(r-1,:) = 0.5*E_dec(r-2,:) + 0.5*E_dec(r,:);
|
|
end
|
|
end
|
|
|
|
end
|
|
|
|
% transform back to surface and calculate MSE
|
|
|
|
E_dec *= qstepdB;
|
|
surf_ = idct(E_dec')';
|
|
endfunction
|
|
|
|
|