You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

246 lines
7.1 KiB

% tnewamp1.m
%
% Copyright David Rowe 2017
% This program is distributed under the terms of the GNU General Public License
% Version 2
#{
Octave script to compare Octave and C versions of newamp1 processing, in order to test C port.
c2sim -> dump files -> $ ../build_linux/unittest/tnewamp1 -> octave:1> tnewamp1
Usage:
1/ build codec2 with -DDUMP - see codec2-dev/README
2/ Generate dump files using c2sim (just need to do this once)
$ cd codec2-dev/build_linux/src
$ ./c2sim ../../raw/hts1a.raw --phase0 --postfilter --dump hts1a --lpc 10 --dump_pitch_e hts1a_pitche.txt
3/ Run C version which generates a file of Octave test vectors as ouput:
$ cd codec2-dev/build_linux/unittest
$ ./tnewamp1 ../../raw/hts1a
4/ Run Octave script to generate Octave test vectors and compare with C.
octave:1> tnewamp1("../build_linux/src/hts1a")
5/ Optionally listen to output
~/codec2-dev/build_linux/src$ ./c2sim ../../raw/hts1a.raw --phase0 --postfilter \
--amread hts1a_am.out --hmread hts1a_hm.out \
--Woread hts1a_Wo.out --hand_voicing hts1a_v.txt -o - \
| play -q -t raw -r 8000 -s -2 -
#}
function tnewamp1(input_prefix)
newamp_700c;
autotest;
more off;
max_amp = 80;
postfilter = 0; % optional postfiler that runs on Am, not used atm
synth_phase = 1;
if nargin == 1
output_prefix = input_prefix;
end
model_name = strcat(input_prefix,"_model.txt");
model = load(model_name);
[frames nc] = size(model);
voicing_name = strcat(input_prefix,"_pitche.txt");
voicing = zeros(1,frames);
if exist(voicing_name, "file") == 2
pitche = load(voicing_name);
voicing = pitche(:, 3);
end
% Load in C vectors and compare -----------------------------------------
load("../build_linux/unittest/tnewamp1_out.txt");
K = 20;
[frames tmp] = size(rate_K_surface_c);
[rate_K_surface sample_freqs_kHz] = resample_const_rate_f_mel(model(1:frames,:), K);
melvq;
load train_120_1.txt; load train_120_2.txt;
train_120_vq(:,:,1)= train_120_1; train_120_vq(:,:,2)= train_120_2; m=5;
m=5;
for f=1:frames
mean_f(f) = mean(rate_K_surface(f,:));
rate_K_surface_no_mean(f,:) = rate_K_surface(f,:) - mean_f(f);
end
[res rate_K_surface_no_mean_ ind] = mbest(train_120_vq, rate_K_surface_no_mean, m);
for f=1:frames
rate_K_surface_no_mean_(f,:) = post_filter(rate_K_surface_no_mean_(f,:), sample_freqs_kHz, 1.5);
end
rate_K_surface_ = zeros(frames, K);
interpolated_surface_ = zeros(frames, K);
energy_q = create_energy_q;
M = 4;
for f=1:frames
[mean_f_ indx] = quantise(energy_q, mean_f(f));
indexes(f,3) = indx - 1;
rate_K_surface_(f,:) = rate_K_surface_no_mean_(f,:) + mean_f_;
end
% simulated decoder
% break into segments of M frames. We have 2 samples spaced M apart
% and interpolate the rest.
Nfft_phase = 128; % note this needs to be 512 (FFT_ENC in codec2 if using --awread)
% with --hmread 128 is preferred as less memory/CPU
model_ = zeros(frames, max_amp+2);
voicing_ = zeros(1,frames);
Aw = zeros(frames, Nfft_phase);
H = zeros(frames, max_amp);
model_(1,1) = Wo_left = 2*pi/100;
voicing_left = 0;
left_vec = zeros(1,K);
% decoder runs on every M-th frame, 25Hz frame rate, offset at
% start is to minimise processing delay (thanks Jeroen!)
for f=M:M:frames
if voicing(f)
index = encode_log_Wo(model(f,1), 6);
if index == 0
index = 1;
end
model_(f,1) = decode_log_Wo(index, 6);
else
model_(f,1) = 2*pi/100;
end
Wo_right = model_(f,1);
voicing_right = voicing(f);
[Wo_ avoicing_] = interp_Wo_v(Wo_left, Wo_right, voicing_left, voicing_right);
for i=1:4
fprintf(stderr, " Wo: %4.3f L: %d v: %d\n", Wo_(i), floor(pi/Wo_(i)), avoicing_(i));
end
fprintf(stderr," rate_K_vec: ");
for i=1:5
fprintf(stderr,"%5.3f ", rate_K_surface_(f,i));
end
fprintf(stderr,"\n");
if f > M
model_(f-M:f-1,1) = Wo_;
voicing_(f-M:f-1) = avoicing_;
model_(f-M:f-1,2) = floor(pi ./ model_(f-M:f-1,1)); % calculate L for each interpolated Wo
end
right_vec = rate_K_surface_(f,:);
if f > M
sample_points = [f-M f];
resample_points = f-M:f-1;
for k=1:K
interpolated_surface_(resample_points,k) = interp_linear(sample_points, [left_vec(k) right_vec(k)], resample_points);
end
for k=f-M:f-1
model_(k,:) = resample_rate_L(model_(k,:), interpolated_surface_(k,:), sample_freqs_kHz);
Aw(k,:) = determine_phase(model_, k, Nfft_phase);
for m=1:model_(k,2)
b = round(m*model_(k,1)*Nfft_phase/(2*pi)); % map harmonic centre to DFT bin
H(k,m) = exp(j*Aw(k, b+1));
end
end
end
% update for next time
Wo_left = Wo_right;
voicing_left = voicing_right;
left_vec = right_vec;
end
figure(1); clf;
mesh(angle(H));
figure(2); clf;
mesh(angle(H_c(:,1:max_amp)));
figure(3); clf;
mesh(abs(H - H_c(:,1:max_amp)));
check(rate_K_surface, rate_K_surface_c, 'rate_K_surface', 0.01);
check(mean_f, mean_c, 'mean', 0.01);
check(rate_K_surface_, rate_K_surface__c, 'rate_K_surface_', 0.01);
check(interpolated_surface_, interpolated_surface__c, 'interpolated_surface_', 0.01);
check(model_(:,1), model__c(:,1), 'interpolated Wo_', 0.001);
check(voicing_, voicing__c, 'interpolated voicing');
check(model_(:,3:max_amp+2), model__c(:,3:max_amp+2), 'rate L Am surface ', 0.1);
check(H, H_c(:,1:max_amp), 'phase surface');
% Save to disk to check synthesis is OK with c2sim
output_prefix = input_prefix;
Am_out_name = sprintf("%s_am.out", output_prefix);
fam = fopen(Am_out_name,"wb");
Wo_out_name = sprintf("%s_Wo.out", output_prefix);
fWo = fopen(Wo_out_name,"wb");
Aw_out_name = sprintf("%s_aw.out", output_prefix);
faw = fopen(Aw_out_name,"wb");
Hm_out_name = sprintf("%s_hm.out", output_prefix);
fhm = fopen(Hm_out_name,"wb");
printf("Generating files for c2sim: ");
for f=1:frames
printf(".", f);
Wo = model_(f,1);
L = min([model_(f,2) max_amp-1]);
Am = model_(f,3:(L+2));
Am_ = zeros(1,2*max_amp);
Am_(2:L) = Am(1:L-1);
fwrite(fam, Am_, "float32");
fwrite(fWo, Wo, "float32");
% Note we send opposite phase as c2sim expects phase of LPC
% analysis filter, just a convention based on historical
% development of Codec 2
Aw1 = zeros(1, Nfft_phase*2);
Aw1(1:2:Nfft_phase*2) = cos(Aw(f,:));
Aw1(2:2:Nfft_phase*2) = -sin(Aw(f,:));
fwrite(faw, Aw1, "float32");
Hm = zeros(1, 2*2*max_amp);
for m=1:L
Hm(2*m+1) = real(H(f,m));
Hm(2*m+2) = imag(H(f,m));
end
fwrite(fhm, Hm, "float32");
end
fclose(fam); fclose(fWo); fclose(faw); fclose(fhm);
v_out_name = sprintf("%s_v.txt", output_prefix);
fv = fopen(v_out_name,"wt");
for f=1:length(voicing__c)
fprintf(fv,"%d\n", voicing__c(f));
end
fclose(fv);
printf("\n")
endfunction