Skip to content

Commit 3b68a48

Browse files
committed
Add LPC-based tone detection
Detecting tones can help us prevent the encoder from getting confused by them.
1 parent 148ccd8 commit 3b68a48

File tree

3 files changed

+170
-6
lines changed

3 files changed

+170
-6
lines changed

celt/celt_encoder.c

Lines changed: 162 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@
5252
#include "vq.h"
5353

5454

55+
#ifndef M_PI
56+
#define M_PI 3.141592653
57+
#endif
58+
59+
5560
/** Encoder state
5661
@brief Encoder state
5762
*/
@@ -226,7 +231,7 @@ void opus_custom_encoder_destroy(CELTEncoder *st)
226231

227232
static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
228233
opus_val16 *tf_estimate, int *tf_chan, int allow_weak_transients,
229-
int *weak_transient)
234+
int *weak_transient, opus_val16 tone_freq, opus_val32 toneishness)
230235
{
231236
int i;
232237
VARDECL(opus_val16, tmp);
@@ -399,6 +404,10 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
399404
}
400405
}
401406
is_transient = mask_metric>200;
407+
/* Prevent the transient detector from confusing the partial cycle of a
408+
very low frequency tone with a transient. */
409+
if (toneishness > QCONST32(.98f, 29) && tone_freq < QCONST16(0.026f, 13))
410+
is_transient = 0;
402411
/* For low bitrates, define "weak transients" that need to be
403412
handled differently to avoid partial collapse. */
404413
if (allow_weak_transients && is_transient && mask_metric<600) {
@@ -1184,9 +1193,135 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
11841193
return maxDepth;
11851194
}
11861195

1196+
#ifdef FIXED_POINT
1197+
void normalize_tone_input(opus_val16 *x, int len) {
1198+
opus_val32 ac0=len;
1199+
int i;
1200+
int shift;
1201+
for (i=0;i<len;i++) {
1202+
ac0 = ADD32(ac0, SHR32(MULT16_16(x[i], x[i]), 10));
1203+
}
1204+
shift = 5 - (28-celt_ilog2(ac0))/2;
1205+
if (shift > 0) {
1206+
for (i=0;i<len;i++) {
1207+
x[i] = PSHR32(x[i], shift);
1208+
}
1209+
}
1210+
}
1211+
int acos_approx(opus_val32 x) {
1212+
opus_val16 x14;
1213+
opus_val32 tmp;
1214+
int flip = x<0;
1215+
x = abs(x);
1216+
x14 = x>>15;
1217+
tmp = (762*x14>>14)-3308;
1218+
tmp = (tmp*x14>>14)+25726;
1219+
tmp = tmp*celt_sqrt(IMAX(0, (1<<30) - (x<<1)))>>16;
1220+
if (flip) tmp = 25736 - tmp;
1221+
return tmp;
1222+
}
1223+
#endif
1224+
1225+
/* Compute the LPC coefficients using a least-squares fit for both forward and backward prediction. */
1226+
static int tone_lpc(const opus_val16 *x, int len, int delay, opus_val32 *lpc) {
1227+
int i;
1228+
opus_val32 r00=0, r01=0, r11=0, r02=0, r12=0, r22=0;
1229+
opus_val32 edges;
1230+
opus_val32 num0, num1, den;
1231+
celt_assert(len > 2*delay);
1232+
/* Compute correlations as if using the forward prediction covariance method. */
1233+
for (i=0;i<len-2*delay;i++) {
1234+
r00 += MULT16_16(x[i],x[i]);
1235+
r01 += MULT16_16(x[i],x[i+delay]);
1236+
r02 += MULT16_16(x[i],x[i+2*delay]);
1237+
}
1238+
edges = 0;
1239+
for (i=0;i<delay;i++) edges += MULT16_16(x[len+i-2*delay],x[len+i-2*delay]) - MULT16_16(x[i],x[i]);
1240+
r11 = r00+edges;
1241+
edges = 0;
1242+
for (i=0;i<delay;i++) edges += MULT16_16(x[len+i-delay],x[len+i-delay]) - MULT16_16(x[i+delay],x[i+delay]);
1243+
r22 = r11+edges;
1244+
edges = 0;
1245+
for (i=0;i<delay;i++) edges += MULT16_16(x[len+i-2*delay],x[len+i-delay]) - MULT16_16(x[i],x[i+delay]);
1246+
r12 = r01+edges;
1247+
/* Reverse and sum to get the backward contribution. */
1248+
{
1249+
opus_val32 R00, R01, R11, R02, R12, R22;
1250+
R00 = r00 + r22;
1251+
R01 = r01 + r12;
1252+
R11 = 2*r11;
1253+
R02 = 2*r02;
1254+
R12 = r12 + r01;
1255+
R22 = r00 + r22;
1256+
r00 = R00;
1257+
r01 = R01;
1258+
r11 = R11;
1259+
r02 = R02;
1260+
r12 = R12;
1261+
r22 = R22;
1262+
}
1263+
/* Solve A*x=b, where A=[r00, r01; r01, r11] and b=[r02; r12]. */
1264+
den = MULT32_32_Q31(r00,r11) - MULT32_32_Q31(r01,r01);
1265+
#ifdef FIXED_POINT
1266+
if (den <= SHR32(MULT32_32_Q31(r00,r11), 10)) return 1;
1267+
#else
1268+
if (den < .001f*MULT32_32_Q31(r00,r11)) return 1;
1269+
#endif
1270+
num1 = MULT32_32_Q31(r02,r11) - MULT32_32_Q31(r01,r12);
1271+
if (num1 >= den) lpc[1] = QCONST32(1.f, 29);
1272+
else lpc[1] = frac_div32_q29(num1, den);
1273+
num0 = MULT32_32_Q31(r00,r12) - MULT32_32_Q31(r02,r01);
1274+
if (HALF32(num0) >= den) lpc[0] = QCONST32(1.999f, 29);
1275+
else lpc[0] = frac_div32_q29(num0, den);
1276+
/*printf("%f %f\n", lpc[0], lpc[1]);*/
1277+
return 0;
1278+
}
1279+
1280+
/* Detects pure of nearly pure tones so we can prevent them from causing problems with the encoder. */
1281+
static opus_val16 tone_detect(const celt_sig *in, const celt_sig *prefilter_mem, int CC, int N, int overlap, opus_val32 *toneishness, opus_int32 Fs) {
1282+
int i;
1283+
int delay = 1;
1284+
int fail;
1285+
opus_val32 lpc[2];
1286+
opus_val16 freq;
1287+
VARDECL(opus_val16, x);
1288+
ALLOC(x, N+overlap, opus_val16);
1289+
/* Shift by SIG_SHIFT+1 (+2 for stereo) to account for HF gain of the preemphasis filter. */
1290+
if (CC==2) {
1291+
for (i=0;i<N;i++) x[i+overlap] = PSHR32(ADD32(in[i], in[i+N+overlap]), SIG_SHIFT+2);
1292+
for (i=0;i<overlap;i++) x[i] = PSHR32(ADD32(prefilter_mem[COMBFILTER_MAXPERIOD-overlap+i], prefilter_mem[2*COMBFILTER_MAXPERIOD-overlap+i]), SIG_SHIFT+2);
1293+
} else {
1294+
for (i=0;i<N;i++) x[i+overlap] = PSHR32(in[i], SIG_SHIFT+1);
1295+
for (i=0;i<overlap;i++) x[i] = PSHR32(prefilter_mem[COMBFILTER_MAXPERIOD-overlap+i], SIG_SHIFT+1);
1296+
}
1297+
#ifdef FIXED_POINT
1298+
normalize_tone_input(x, N+overlap);
1299+
#endif
1300+
fail = tone_lpc(x, N+overlap, delay, lpc);
1301+
/* If our LPC filter resonates too close to DC, retry the analysis with down-sampling. */
1302+
while (delay <= Fs/3000 && (fail || (lpc[0] > QCONST32(1.f, 29) && lpc[1] < 0))) {
1303+
delay *= 2;
1304+
fail = tone_lpc(x, N+overlap, delay, lpc);
1305+
}
1306+
/* Check that our filter has complex roots. */
1307+
if (!fail && MULT32_32_Q31(lpc[0],lpc[0]) + MULT32_32_Q31(QCONST32(4.f, 29), lpc[1]) < 0) {
1308+
/* Squared radius of the poles. */
1309+
*toneishness = -lpc[1];
1310+
#ifdef FIXED_POINT
1311+
freq = acos_approx(lpc[0]>>1)/delay;
1312+
#else
1313+
freq = acos(.5f*lpc[0])/delay;
1314+
#endif
1315+
} else {
1316+
freq = -1;
1317+
*toneishness=0;
1318+
}
1319+
/*printf("%f %f %f %f\n", freq, lpc[0], lpc[1], *toneishness);*/
1320+
return freq;
1321+
}
11871322

11881323
static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N,
1189-
int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes, AnalysisInfo *analysis)
1324+
int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes, AnalysisInfo *analysis, opus_val16 tone_freq, opus_val32 toneishness)
11901325
{
11911326
int c;
11921327
VARDECL(celt_sig, _pre);
@@ -1231,6 +1366,26 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
12311366
if (pitch_index > COMBFILTER_MAXPERIOD-2)
12321367
pitch_index = COMBFILTER_MAXPERIOD-2;
12331368
gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
1369+
/* If we detect that the signal is dominated by a single tone, don't rely on the standard pitch
1370+
estimator, as it can become unreliable. */
1371+
if (toneishness > QCONST32(.99f, 29)) {
1372+
/* If the pitch is too high for our post-filter, apply pitch doubling until
1373+
we can get something that fits (not ideal, but better than nothing). */
1374+
while (tone_freq >= QCONST16(0.39f, 13)) tone_freq/=2;
1375+
if (tone_freq > QCONST16(0.006148f, 13)) {
1376+
#ifdef FIXED_POINT
1377+
pitch_index = IMIN(51472/tone_freq, COMBFILTER_MAXPERIOD-2);
1378+
#else
1379+
pitch_index = IMIN((int)floor(.5+2.f*M_PI/tone_freq), COMBFILTER_MAXPERIOD-2);
1380+
#endif
1381+
} else {
1382+
/* If the pitch is too low, using a very high pitch will actually give us an improvement
1383+
due to the DC component of the filter that will be close to our tone. Again, not ideal,
1384+
but if we only have a single tone, it's better than nothing. */
1385+
pitch_index = COMBFILTER_MINPERIOD;
1386+
}
1387+
gain1 = QCONST16(.75f, 15);
1388+
}
12341389
/*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/
12351390
if (st->loss_rate>2)
12361391
gain1 = HALF32(gain1);
@@ -1499,6 +1654,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
14991654
int hybrid;
15001655
int weak_transient = 0;
15011656
int enable_tf_analysis;
1657+
opus_val16 tone_freq=-1;
1658+
opus_val32 toneishness=0;
15021659
VARDECL(opus_val16, surround_dynalloc);
15031660
ALLOC_STACK;
15041661

@@ -1683,7 +1840,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
16831840
} while (++c<CC);
16841841

16851842

1686-
1843+
tone_freq = tone_detect(in+overlap, prefilter_mem, 1, N, overlap, &toneishness, mode->Fs);
16871844
/* Find pitch period and gain */
16881845
{
16891846
int enabled;
@@ -1692,7 +1849,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
16921849
&& st->complexity >= 5;
16931850

16941851
prefilter_tapset = st->tapset_decision;
1695-
pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes, &st->analysis);
1852+
pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes, &st->analysis, tone_freq, toneishness);
16961853
if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
16971854
&& (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
16981855
pitch_change = 1;
@@ -1724,7 +1881,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
17241881
though (small SILK quantization offset value). */
17251882
int allow_weak_transients = hybrid && effectiveBytes<15 && st->silk_info.signalType != 2;
17261883
isTransient = transient_analysis(in, N+overlap, CC,
1727-
&tf_estimate, &tf_chan, allow_weak_transients, &weak_transient);
1884+
&tf_estimate, &tf_chan, allow_weak_transients, &weak_transient, tone_freq, toneishness);
17281885
}
17291886
if (LM>0 && ec_tell(enc)+3<=total_bits)
17301887
{

celt/mathops.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ unsigned isqrt32(opus_uint32 _val){
6767

6868
#ifdef FIXED_POINT
6969

70-
opus_val32 frac_div32(opus_val32 a, opus_val32 b)
70+
opus_val32 frac_div32_q29(opus_val32 a, opus_val32 b)
7171
{
7272
opus_val16 rcp;
7373
opus_val32 result, rem;
@@ -79,6 +79,11 @@ opus_val32 frac_div32(opus_val32 a, opus_val32 b)
7979
result = MULT16_32_Q15(rcp, a);
8080
rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
8181
result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
82+
return result;
83+
}
84+
85+
opus_val32 frac_div32(opus_val32 a, opus_val32 b) {
86+
opus_val32 result = frac_div32_q29(a,b);
8287
if (result >= 536870912) /* 2^29 */
8388
return 2147483647; /* 2^31 - 1 */
8489
else if (result <= -536870912) /* -2^29 */

celt/mathops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
120120
#define celt_rcp(x) (1.f/(x))
121121
#define celt_div(a,b) ((a)/(b))
122122
#define frac_div32(a,b) ((float)(a)/(b))
123+
#define frac_div32_q29(a,b) frac_div32(a,b)
123124

124125
#ifdef FLOAT_APPROX
125126

@@ -254,6 +255,7 @@ opus_val32 celt_rcp(opus_val32 x);
254255

255256
#define celt_div(a,b) MULT32_32_Q31((opus_val32)(a),celt_rcp(b))
256257

258+
opus_val32 frac_div32_q29(opus_val32 a, opus_val32 b);
257259
opus_val32 frac_div32(opus_val32 a, opus_val32 b);
258260

259261
#define M1 32767

0 commit comments

Comments
 (0)