From d3c2783ca8966b0c91dd697619683769a05fd6dc Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sun, 7 Apr 2019 03:14:49 -0400 Subject: [PATCH 01/10] Use features from Opus --- src/dump_data.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++ src/freq.c | 4 +-- src/freq.h | 4 +++ 3 files changed, 84 insertions(+), 3 deletions(-) diff --git a/src/dump_data.c b/src/dump_data.c index 33364c74..97aa2498 100644 --- a/src/dump_data.c +++ b/src/dump_data.c @@ -41,6 +41,49 @@ #include #include "lpcnet.h" #include "lpcnet_private.h" +#include "opus.h" + +void compute_band_energy_from_lpc(float *bandE, float g, const float *lpc) { + int i; + float sum[NB_BANDS] = {0}; + float x[WINDOW_SIZE]; + kiss_fft_cpx X[FREQ_SIZE]; + { + RNN_CLEAR(x, WINDOW_SIZE); + x[0] = 1; + for (i=0;ipcount*FRAME_SIZE], pcm, FRAME_SIZE); + if (st->pcount == 1 || st->pcount == 3) { + unsigned char bytes[100]; + short pcm_dec[320]; + float data[4][19]; + float bandE[4][NB_BANDS]; + int nb_bytes; + int nb_samples; + int pick; + nb_bytes = opus_encode(enc, &pcmbuf[(st->pcount-1)*FRAME_SIZE], 320, bytes, 100); + nb_samples = opus_decode(dec, bytes, nb_bytes, pcm_dec, 320, 0); + if (nb_samples != 320) break; + get_fdump(data); + for (i=0;i<4;i++) compute_band_energy_from_lpc(bandE[i], data[i][18], data[i]); + for (i=0;ifeatures[st->pcount-1][i]); + dct(st->features[st->pcount-1], bandE[0]); + dct(st->features[st->pcount] , bandE[2]); + st->features[st->pcount-1][0] -= 4; + st->features[st->pcount][0] -= 4; + pick = data[0][17] > data[1][17] ? 0 : 1; + st->features[st->pcount-1][36] = .02*(data[pick][16] - 100); + st->features[st->pcount-1][37] = data[pick][17] - .5; + pick = data[2][17] > data[3][17] ? 2 : 3; + st->features[st->pcount][36] = .02*(data[pick][16] - 100); + st->features[st->pcount][37] = data[pick][17] - .5; + //for (i=0;i<38;i++) printf("%f ", st->features[st->pcount-1][i]); + //for (i=0;i<38;i++) printf("%f ", st->features[st->pcount][i]); + //printf("%f %f %f %f %f\n", st->features[st->pcount-1][37], data[1][16], data[3][16], 100+50*st->features[st->pcount-1][36], 100+50*st->features[st->pcount][36]); + } if (fpcm) { compute_noise(&noisebuf[st->pcount*FRAME_SIZE], noise_std); } diff --git a/src/freq.c b/src/freq.c index c88d0715..9dce9401 100644 --- a/src/freq.c +++ b/src/freq.c @@ -40,9 +40,7 @@ #include "celt_lpc.h" #include -#define SQUARE(x) ((x)*(x)) - -static const opus_int16 eband5ms[] = { +const opus_int16 eband5ms[] = { /*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k*/ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40 }; diff --git a/src/freq.h b/src/freq.h index b4fbbf08..a24db288 100644 --- a/src/freq.h +++ b/src/freq.h @@ -43,6 +43,10 @@ #define NB_BANDS 18 #define NB_BANDS_1 (NB_BANDS - 1) +#define SQUARE(x) ((x)*(x)) + +extern const opus_int16 eband5ms[]; + void compute_band_energy(float *bandE, const kiss_fft_cpx *X); void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P); From 8ba3c22f7f1d2773acdf6a8457f34e52bdb0b8f5 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sun, 7 Apr 2019 03:46:42 -0400 Subject: [PATCH 02/10] Handling pre-emphasis --- src/dump_data.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/dump_data.c b/src/dump_data.c index 97aa2498..cfa6f9aa 100644 --- a/src/dump_data.c +++ b/src/dump_data.c @@ -43,6 +43,8 @@ #include "lpcnet_private.h" #include "opus.h" +float preemph_offset[NB_BANDS] = {1.772676, 2.937053, 0.278042, 0.299267, 0.126341, 0.060082, 0.019509, -0.017281, 0.000530, -0.000156, -0.007375, -0.010533, -0.002903, -0.005244, -0.003251, -0.000492, -0.000174, -0.004998}; + void compute_band_energy_from_lpc(float *bandE, float g, const float *lpc) { int i; float sum[NB_BANDS] = {0}; @@ -51,6 +53,7 @@ void compute_band_energy_from_lpc(float *bandE, float g, const float *lpc) { { RNN_CLEAR(x, WINDOW_SIZE); x[0] = 1; + //x[1] = -PREEMPHASIS; for (i=0;ipcount*FRAME_SIZE + i] = (1.f/32768.f)*x[i]; + preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE); for (i=0;ipcount-1)*FRAME_SIZE], 320, bytes, 100); + nb_bytes = opus_encode_float(enc, &xbuf[(st->pcount-1)*FRAME_SIZE], 320, bytes, 100); nb_samples = opus_decode(dec, bytes, nb_bytes, pcm_dec, 320, 0); if (nb_samples != 320) break; get_fdump(data); @@ -324,6 +330,8 @@ int main(int argc, char **argv) { dct(st->features[st->pcount] , bandE[2]); st->features[st->pcount-1][0] -= 4; st->features[st->pcount][0] -= 4; + for (i=0;ifeatures[st->pcount-1][i] -= preemph_offset[i]; + for (i=0;ifeatures[st->pcount][i] -= preemph_offset[i]; pick = data[0][17] > data[1][17] ? 0 : 1; st->features[st->pcount-1][36] = .02*(data[pick][16] - 100); st->features[st->pcount-1][37] = data[pick][17] - .5; From b02c399cd6896cce2238eba6faf6a5dfe98cdef4 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sun, 7 Apr 2019 04:02:04 -0400 Subject: [PATCH 03/10] recompute LPC --- src/dump_data.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/dump_data.c b/src/dump_data.c index cfa6f9aa..e5c8825c 100644 --- a/src/dump_data.c +++ b/src/dump_data.c @@ -325,7 +325,7 @@ int main(int argc, char **argv) { for (i=0;i<4;i++) compute_band_energy_from_lpc(bandE[i], data[i][18], data[i]); for (i=0;ifeatures[st->pcount-1][i]); + //for (i=0;i<55;i++) printf("%f ", st->features[st->pcount-1][i]); dct(st->features[st->pcount-1], bandE[0]); dct(st->features[st->pcount] , bandE[2]); st->features[st->pcount-1][0] -= 4; @@ -338,8 +338,11 @@ int main(int argc, char **argv) { pick = data[2][17] > data[3][17] ? 2 : 3; st->features[st->pcount][36] = .02*(data[pick][16] - 100); st->features[st->pcount][37] = data[pick][17] - .5; - //for (i=0;i<38;i++) printf("%f ", st->features[st->pcount-1][i]); - //for (i=0;i<38;i++) printf("%f ", st->features[st->pcount][i]); + + lpc_from_cepstrum(&st->features[st->pcount-1][2*NB_BANDS+3], st->features[st->pcount-1]); + lpc_from_cepstrum(&st->features[st->pcount][2*NB_BANDS+3], st->features[st->pcount]); + //for (i=0;i<55;i++) printf("%f ", st->features[st->pcount-1][i]); + //for (i=0;i<55;i++) printf("%f ", st->features[st->pcount][i]); //printf("%f %f %f %f %f\n", st->features[st->pcount-1][37], data[1][16], data[3][16], 100+50*st->features[st->pcount-1][36], 100+50*st->features[st->pcount][36]); } if (fpcm) { From b4762602b98a605e100adaf2ba150f0c2d702c2d Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sun, 7 Apr 2019 04:13:40 -0400 Subject: [PATCH 04/10] delay compensation --- src/dump_data.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/dump_data.c b/src/dump_data.c index e5c8825c..5ad21e3d 100644 --- a/src/dump_data.c +++ b/src/dump_data.c @@ -187,12 +187,15 @@ int main(int argc, char **argv) { int training = -1; int encode = 0; int decode = 0; + int delay = TRAINING_OFFSET; int quantize = 0; OpusEncoder *enc; OpusDecoder *dec; enc = opus_encoder_create(16000, 1, OPUS_APPLICATION_VOIP, NULL); opus_encoder_ctl(enc, OPUS_SET_BITRATE(6000)); opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND)); + opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&delay)); + fprintf(stderr, "delay is %d\n", delay); dec = opus_decoder_create(16000, 1, NULL); st = lpcnet_encoder_create(); if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1; @@ -306,7 +309,7 @@ int main(int argc, char **argv) { preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE); for (i=0;ipcount*FRAME_SIZE], pcm, FRAME_SIZE); @@ -338,7 +341,7 @@ int main(int argc, char **argv) { pick = data[2][17] > data[3][17] ? 2 : 3; st->features[st->pcount][36] = .02*(data[pick][16] - 100); st->features[st->pcount][37] = data[pick][17] - .5; - + lpc_from_cepstrum(&st->features[st->pcount-1][2*NB_BANDS+3], st->features[st->pcount-1]); lpc_from_cepstrum(&st->features[st->pcount][2*NB_BANDS+3], st->features[st->pcount]); //for (i=0;i<55;i++) printf("%f ", st->features[st->pcount-1][i]); @@ -351,13 +354,21 @@ int main(int argc, char **argv) { st->pcount++; /* Running on groups of 4 frames. */ if (st->pcount == 4) { +#if 0 unsigned char buf[8]; process_superframe(st, buf, ffeat, encode, quantize); - if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm); +#else + if (ffeat) { + for (i=0;i<4;i++) { + fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat); + } + } +#endif + if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm); st->pcount = 0; } //if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm); - for (i=0;i Date: Mon, 8 Apr 2019 12:48:53 -0400 Subject: [PATCH 05/10] cap the pitch --- src/lpcnet.c | 2 +- src/train_lpcnet.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lpcnet.c b/src/lpcnet.c index 2c9dc1dc..23040770 100644 --- a/src/lpcnet.c +++ b/src/lpcnet.c @@ -129,7 +129,7 @@ LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, int pitch; float pitch_gain; /* Matches the Python code -- the 0.1 avoids rounding issues. */ - pitch = (int)floor(.1 + 50*features[36]+100); + pitch = IMIN(255, (int)floor(.1 + 50*features[36]+100)); pitch_gain = lpcnet->old_gain[FEATURES_DELAY-1]; memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0])); lpcnet->old_gain[0] = features[PITCH_GAIN_FEATURE]; diff --git a/src/train_lpcnet.py b/src/train_lpcnet.py index 0b5e0ba2..4be73fee 100755 --- a/src/train_lpcnet.py +++ b/src/train_lpcnet.py @@ -95,6 +95,7 @@ periods = (.1 + 50*features[:,:,36:37]+100).astype('int16') +periods = np.minimum(periods, 255); in_data = np.concatenate([sig, pred, in_exc], axis=-1) @@ -103,7 +104,7 @@ del in_exc # dump models to disk as we go -checkpoint = ModelCheckpoint('lpcnet24g_384_10_G16_{epoch:02d}.h5') +checkpoint = ModelCheckpoint('lpcnet27a_384_10_G16_{epoch:02d}.h5') #Set this to True to adapt an existing model (e.g. on new data) adaptation = False @@ -121,4 +122,5 @@ decay = 5e-5 model.compile(optimizer=Adam(lr, amsgrad=True, decay=decay), loss='sparse_categorical_crossentropy') +model.save_weights('lpcnet27a_384_10_G16_00.h5'); model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify]) From 9ffb76d942cd7ee6cda2b459809923f1ace0ad82 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Tue, 9 Apr 2019 01:31:30 -0400 Subject: [PATCH 06/10] spectrum from synthesis --- src/dump_data.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/dump_data.c b/src/dump_data.c index 5ad21e3d..c926eaed 100644 --- a/src/dump_data.c +++ b/src/dump_data.c @@ -129,6 +129,7 @@ void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *f float e; int j; for (j=0;jfeatures[k][2*NB_BANDS+3+j]*st->sig_mem[j]; + //printf("%f\n", pcm[k*FRAME_SIZE+i] - p); e = lin2ulaw(pcm[k*FRAME_SIZE+i] - p); /* Signal. */ data[4*i] = lin2ulaw(st->sig_mem[0]); @@ -195,6 +196,7 @@ int main(int argc, char **argv) { opus_encoder_ctl(enc, OPUS_SET_BITRATE(6000)); opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND)); opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&delay)); + delay = 160; fprintf(stderr, "delay is %d\n", delay); dec = opus_decoder_create(16000, 1, NULL); st = lpcnet_encoder_create(); @@ -310,31 +312,37 @@ int main(int argc, char **argv) { for (i=0;ipcount*FRAME_SIZE], pcm, FRAME_SIZE); if (st->pcount == 1 || st->pcount == 3) { unsigned char bytes[100]; - short pcm_dec[320]; + float pcm_dec[320]; float data[4][19]; float bandE[4][NB_BANDS]; int nb_bytes; int nb_samples; int pick; + static float mem_preemph2 = 0; nb_bytes = opus_encode_float(enc, &xbuf[(st->pcount-1)*FRAME_SIZE], 320, bytes, 100); - nb_samples = opus_decode(dec, bytes, nb_bytes, pcm_dec, 320, 0); + nb_samples = opus_decode_float(dec, bytes, nb_bytes, pcm_dec, 320, 0); + preemphasis(pcm_dec, &mem_preemph2, pcm_dec, PREEMPHASIS, 2*FRAME_SIZE); if (nb_samples != 320) break; + for (i=0;i<320;i++) pcm_dec[i] *= 32768; + st->pcount--; + compute_frame_features(st, pcm_dec); + st->pcount++; + compute_frame_features(st, pcm_dec+160); get_fdump(data); +#if 1 for (i=0;i<4;i++) compute_band_energy_from_lpc(bandE[i], data[i][18], data[i]); - for (i=0;ifeatures[st->pcount-1][i]); - dct(st->features[st->pcount-1], bandE[0]); - dct(st->features[st->pcount] , bandE[2]); - st->features[st->pcount-1][0] -= 4; - st->features[st->pcount][0] -= 4; - for (i=0;ifeatures[st->pcount-1][i] -= preemph_offset[i]; - for (i=0;ifeatures[st->pcount][i] -= preemph_offset[i]; + for (i=0;ifeatures[st->pcount-1][NB_BANDS], bandE[0]); + dct(&st->features[st->pcount][NB_BANDS] , bandE[2]); + st->features[st->pcount-1][NB_BANDS] -= 4; + st->features[st->pcount][NB_BANDS] -= 4; +#endif pick = data[0][17] > data[1][17] ? 0 : 1; st->features[st->pcount-1][36] = .02*(data[pick][16] - 100); st->features[st->pcount-1][37] = data[pick][17] - .5; @@ -342,10 +350,11 @@ int main(int argc, char **argv) { st->features[st->pcount][36] = .02*(data[pick][16] - 100); st->features[st->pcount][37] = data[pick][17] - .5; - lpc_from_cepstrum(&st->features[st->pcount-1][2*NB_BANDS+3], st->features[st->pcount-1]); - lpc_from_cepstrum(&st->features[st->pcount][2*NB_BANDS+3], st->features[st->pcount]); + //lpc_from_cepstrum(&st->features[st->pcount-1][2*NB_BANDS+3], st->features[st->pcount-1]); + //lpc_from_cepstrum(&st->features[st->pcount][2*NB_BANDS+3], st->features[st->pcount]); //for (i=0;i<55;i++) printf("%f ", st->features[st->pcount-1][i]); //for (i=0;i<55;i++) printf("%f ", st->features[st->pcount][i]); + //printf("\n"); //printf("%f %f %f %f %f\n", st->features[st->pcount-1][37], data[1][16], data[3][16], 100+50*st->features[st->pcount-1][36], 100+50*st->features[st->pcount][36]); } if (fpcm) { From 9ede75e1296d9df4b68468ec5daac579e8384d59 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Tue, 9 Apr 2019 02:08:07 -0400 Subject: [PATCH 07/10] Fixing features --- src/dump_data.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/dump_data.c b/src/dump_data.c index c926eaed..b14744b1 100644 --- a/src/dump_data.c +++ b/src/dump_data.c @@ -85,7 +85,7 @@ void compute_band_energy_from_lpc(float *bandE, float g, const float *lpc) { { bandE[i] = sum[i]; } - for (i=0;i=0;i--) { + if (st->features[i][36] > -1.99) last_pitch = st->features[i][36]; + else st->features[i][36] = last_pitch; + } + last_pitch = st->features[3][36]; + RNN_COPY(ftemp, &st->features[3][0], 55); + for (i=3;i>=1;i--) { + RNN_COPY(&st->features[i][NB_BANDS], &st->features[i-1][NB_BANDS], NB_BANDS+2); + } + RNN_COPY(&st->features[0][NB_BANDS], &fmem[NB_BANDS], NB_BANDS+2); + RNN_COPY(fmem, ftemp, 55); for (i=0;i<4;i++) { - fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat); + int j; + for (j=0;jfeatures[i][NB_BANDS+j] -= st->features[i][j]; } + if (ffeat) { + for (i=0;i<4;i++) { + fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat); + } } #endif if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm); From c902650114be30a890cddf6e8d66761294b9217a Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Tue, 9 Apr 2019 11:58:11 -0400 Subject: [PATCH 08/10] use features 18..36 --- src/lpcnet.c | 3 +++ src/lpcnet_demo.c | 4 +++- src/train_lpcnet.py | 6 +++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/lpcnet.c b/src/lpcnet.c index 23040770..05824877 100644 --- a/src/lpcnet.c +++ b/src/lpcnet.c @@ -137,6 +137,9 @@ LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0])); memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0])); lpc_from_cepstrum(lpcnet->old_lpc[0], features); + //for (i=0;i<16;i++) printf("%f ", lpcnet->old_lpc[0][i]); + //printf("\n"); + if (lpcnet->frame_count <= FEATURES_DELAY) { RNN_CLEAR(output, N); diff --git a/src/lpcnet_demo.c b/src/lpcnet_demo.c index a838840b..1d44009a 100644 --- a/src/lpcnet_demo.c +++ b/src/lpcnet_demo.c @@ -109,13 +109,15 @@ int main(int argc, char **argv) { LPCNetState *net; net = lpcnet_create(); while (1) { + int i; float in_features[NB_TOTAL_FEATURES]; float features[NB_FEATURES]; short pcm[LPCNET_FRAME_SIZE]; fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin); if (feof(fin)) break; RNN_COPY(features, in_features, NB_FEATURES); - RNN_CLEAR(&features[18], 18); + //for (i=0;i<16;i++) printf("%f ", in_features[NB_TOTAL_FEATURES-16+i]); + //RNN_CLEAR(&features[18], 18); lpcnet_synthesize(net, features, pcm, LPCNET_FRAME_SIZE); fwrite(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fout); } diff --git a/src/train_lpcnet.py b/src/train_lpcnet.py index 4be73fee..7e625656 100755 --- a/src/train_lpcnet.py +++ b/src/train_lpcnet.py @@ -87,7 +87,7 @@ features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features)) features = features[:, :, :nb_used_features] -features[:,:,18:36] = 0 +#features[:,:,18:36] = 0 fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0) fpad2 = np.concatenate([features[1:, :2, :], features[0:1, -2:, :]], axis=0) @@ -104,7 +104,7 @@ del in_exc # dump models to disk as we go -checkpoint = ModelCheckpoint('lpcnet27a_384_10_G16_{epoch:02d}.h5') +checkpoint = ModelCheckpoint('lpcnet27b_384_10_G16_{epoch:02d}.h5') #Set this to True to adapt an existing model (e.g. on new data) adaptation = False @@ -122,5 +122,5 @@ decay = 5e-5 model.compile(optimizer=Adam(lr, amsgrad=True, decay=decay), loss='sparse_categorical_crossentropy') -model.save_weights('lpcnet27a_384_10_G16_00.h5'); +model.save_weights('lpcnet27b_384_10_G16_00.h5'); model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify]) From fba7d54352f04313e470a94b8f3ce5301b2a26da Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Tue, 9 Apr 2019 14:06:08 -0400 Subject: [PATCH 09/10] feature set 29 --- src/dump_data.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/dump_data.c b/src/dump_data.c index b14744b1..5d1767d0 100644 --- a/src/dump_data.c +++ b/src/dump_data.c @@ -196,7 +196,7 @@ int main(int argc, char **argv) { opus_encoder_ctl(enc, OPUS_SET_BITRATE(6000)); opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND)); opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&delay)); - delay = 160; + delay = 92+40; fprintf(stderr, "delay is %d\n", delay); dec = opus_decoder_create(16000, 1, NULL); st = lpcnet_encoder_create(); @@ -350,6 +350,9 @@ int main(int argc, char **argv) { st->features[st->pcount][36] = .02*(data[pick][16] - 100); st->features[st->pcount][37] = data[pick][17] - .5; + for (i=0;i<16;i++) st->features[st->pcount-1][39+i] = -data[0][i]; + for (i=0;i<16;i++) st->features[st->pcount][39+i] = -data[2][i]; + //lpc_from_cepstrum(&st->features[st->pcount-1][2*NB_BANDS+3], st->features[st->pcount-1]); //lpc_from_cepstrum(&st->features[st->pcount][2*NB_BANDS+3], st->features[st->pcount]); //for (i=0;i<55;i++) printf("%f ", st->features[st->pcount-1][i]); @@ -375,12 +378,14 @@ int main(int argc, char **argv) { else st->features[i][36] = last_pitch; } last_pitch = st->features[3][36]; +#if 0 RNN_COPY(ftemp, &st->features[3][0], 55); for (i=3;i>=1;i--) { RNN_COPY(&st->features[i][NB_BANDS], &st->features[i-1][NB_BANDS], NB_BANDS+2); } RNN_COPY(&st->features[0][NB_BANDS], &fmem[NB_BANDS], NB_BANDS+2); RNN_COPY(fmem, ftemp, 55); +#endif for (i=0;i<4;i++) { int j; for (j=0;jfeatures[i][NB_BANDS+j] -= st->features[i][j]; From d9b646b06e4a9d37be7b769e89cb95cdda565665 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Wed, 10 Apr 2019 10:52:43 -0400 Subject: [PATCH 10/10] For wavenet comparison --- src/dump_data.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/dump_data.c b/src/dump_data.c index 5d1767d0..8f46a301 100644 --- a/src/dump_data.c +++ b/src/dump_data.c @@ -122,6 +122,8 @@ void compute_noise(int *noise, float noise_std) { void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file) { int i, k; + fwrite(pcm, 4*FRAME_SIZE, 2, file); + return; for (k=0;k<4;k++) { unsigned char data[4*FRAME_SIZE]; for (i=0;isig_mem[0] = p + ulaw2lin(e); st->exc_mem = e; } - fwrite(data, 4*FRAME_SIZE, 1, file); + //fwrite(data, 4*FRAME_SIZE, 1, file); } } @@ -196,7 +198,7 @@ int main(int argc, char **argv) { opus_encoder_ctl(enc, OPUS_SET_BITRATE(6000)); opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND)); opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&delay)); - delay = 92+40; + delay = 160; fprintf(stderr, "delay is %d\n", delay); dec = opus_decoder_create(16000, 1, NULL); st = lpcnet_encoder_create(); @@ -308,7 +310,7 @@ int main(int argc, char **argv) { } for (i=0;ipcount*FRAME_SIZE + i] = (1.f/32768.f)*x[i]; - preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE); + //preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE); for (i=0;ifeatures[i], sizeof(float), NB_TOTAL_FEATURES, ffeat); + fwrite(st->features[i], sizeof(float), 38, ffeat); } } #endif