52
52
#include "vq.h"
53
53
54
54
55
+ #ifndef M_PI
56
+ #define M_PI 3.141592653
57
+ #endif
58
+
59
+
55
60
/** Encoder state
56
61
@brief Encoder state
57
62
*/
@@ -226,7 +231,7 @@ void opus_custom_encoder_destroy(CELTEncoder *st)
226
231
227
232
static int transient_analysis (const opus_val32 * OPUS_RESTRICT in , int len , int C ,
228
233
opus_val16 * tf_estimate , int * tf_chan , int allow_weak_transients ,
229
- int * weak_transient )
234
+ int * weak_transient , opus_val16 tone_freq , opus_val32 toneishness )
230
235
{
231
236
int i ;
232
237
VARDECL (opus_val16 , tmp );
@@ -399,6 +404,10 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
399
404
}
400
405
}
401
406
is_transient = mask_metric > 200 ;
407
+ /* Prevent the transient detector from confusing the partial cycle of a
408
+ very low frequency tone with a transient. */
409
+ if (toneishness > QCONST32 (.98f , 29 ) && tone_freq < QCONST16 (0.026f , 13 ))
410
+ is_transient = 0 ;
402
411
/* For low bitrates, define "weak transients" that need to be
403
412
handled differently to avoid partial collapse. */
404
413
if (allow_weak_transients && is_transient && mask_metric < 600 ) {
@@ -1184,9 +1193,135 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
1184
1193
return maxDepth ;
1185
1194
}
1186
1195
1196
+ #ifdef FIXED_POINT
1197
+ void normalize_tone_input (opus_val16 * x , int len ) {
1198
+ opus_val32 ac0 = len ;
1199
+ int i ;
1200
+ int shift ;
1201
+ for (i = 0 ;i < len ;i ++ ) {
1202
+ ac0 = ADD32 (ac0 , SHR32 (MULT16_16 (x [i ], x [i ]), 10 ));
1203
+ }
1204
+ shift = 5 - (28 - celt_ilog2 (ac0 ))/2 ;
1205
+ if (shift > 0 ) {
1206
+ for (i = 0 ;i < len ;i ++ ) {
1207
+ x [i ] = PSHR32 (x [i ], shift );
1208
+ }
1209
+ }
1210
+ }
1211
+ int acos_approx (opus_val32 x ) {
1212
+ opus_val16 x14 ;
1213
+ opus_val32 tmp ;
1214
+ int flip = x < 0 ;
1215
+ x = abs (x );
1216
+ x14 = x >>15 ;
1217
+ tmp = (762 * x14 >>14 )- 3308 ;
1218
+ tmp = (tmp * x14 >>14 )+ 25726 ;
1219
+ tmp = tmp * celt_sqrt (IMAX (0 , (1 <<30 ) - (x <<1 )))>>16 ;
1220
+ if (flip ) tmp = 25736 - tmp ;
1221
+ return tmp ;
1222
+ }
1223
+ #endif
1224
+
1225
+ /* Compute the LPC coefficients using a least-squares fit for both forward and backward prediction. */
1226
+ static int tone_lpc (const opus_val16 * x , int len , int delay , opus_val32 * lpc ) {
1227
+ int i ;
1228
+ opus_val32 r00 = 0 , r01 = 0 , r11 = 0 , r02 = 0 , r12 = 0 , r22 = 0 ;
1229
+ opus_val32 edges ;
1230
+ opus_val32 num0 , num1 , den ;
1231
+ celt_assert (len > 2 * delay );
1232
+ /* Compute correlations as if using the forward prediction covariance method. */
1233
+ for (i = 0 ;i < len - 2 * delay ;i ++ ) {
1234
+ r00 += MULT16_16 (x [i ],x [i ]);
1235
+ r01 += MULT16_16 (x [i ],x [i + delay ]);
1236
+ r02 += MULT16_16 (x [i ],x [i + 2 * delay ]);
1237
+ }
1238
+ edges = 0 ;
1239
+ for (i = 0 ;i < delay ;i ++ ) edges += MULT16_16 (x [len + i - 2 * delay ],x [len + i - 2 * delay ]) - MULT16_16 (x [i ],x [i ]);
1240
+ r11 = r00 + edges ;
1241
+ edges = 0 ;
1242
+ for (i = 0 ;i < delay ;i ++ ) edges += MULT16_16 (x [len + i - delay ],x [len + i - delay ]) - MULT16_16 (x [i + delay ],x [i + delay ]);
1243
+ r22 = r11 + edges ;
1244
+ edges = 0 ;
1245
+ for (i = 0 ;i < delay ;i ++ ) edges += MULT16_16 (x [len + i - 2 * delay ],x [len + i - delay ]) - MULT16_16 (x [i ],x [i + delay ]);
1246
+ r12 = r01 + edges ;
1247
+ /* Reverse and sum to get the backward contribution. */
1248
+ {
1249
+ opus_val32 R00 , R01 , R11 , R02 , R12 , R22 ;
1250
+ R00 = r00 + r22 ;
1251
+ R01 = r01 + r12 ;
1252
+ R11 = 2 * r11 ;
1253
+ R02 = 2 * r02 ;
1254
+ R12 = r12 + r01 ;
1255
+ R22 = r00 + r22 ;
1256
+ r00 = R00 ;
1257
+ r01 = R01 ;
1258
+ r11 = R11 ;
1259
+ r02 = R02 ;
1260
+ r12 = R12 ;
1261
+ r22 = R22 ;
1262
+ }
1263
+ /* Solve A*x=b, where A=[r00, r01; r01, r11] and b=[r02; r12]. */
1264
+ den = MULT32_32_Q31 (r00 ,r11 ) - MULT32_32_Q31 (r01 ,r01 );
1265
+ #ifdef FIXED_POINT
1266
+ if (den <= SHR32 (MULT32_32_Q31 (r00 ,r11 ), 10 )) return 1 ;
1267
+ #else
1268
+ if (den < .001f * MULT32_32_Q31 (r00 ,r11 )) return 1 ;
1269
+ #endif
1270
+ num1 = MULT32_32_Q31 (r02 ,r11 ) - MULT32_32_Q31 (r01 ,r12 );
1271
+ if (num1 >= den ) lpc [1 ] = QCONST32 (1.f , 29 );
1272
+ else lpc [1 ] = frac_div32_q29 (num1 , den );
1273
+ num0 = MULT32_32_Q31 (r00 ,r12 ) - MULT32_32_Q31 (r02 ,r01 );
1274
+ if (HALF32 (num0 ) >= den ) lpc [0 ] = QCONST32 (1.999f , 29 );
1275
+ else lpc [0 ] = frac_div32_q29 (num0 , den );
1276
+ /*printf("%f %f\n", lpc[0], lpc[1]);*/
1277
+ return 0 ;
1278
+ }
1279
+
1280
+ /* Detects pure of nearly pure tones so we can prevent them from causing problems with the encoder. */
1281
+ static opus_val16 tone_detect (const celt_sig * in , const celt_sig * prefilter_mem , int CC , int N , int overlap , opus_val32 * toneishness , opus_int32 Fs ) {
1282
+ int i ;
1283
+ int delay = 1 ;
1284
+ int fail ;
1285
+ opus_val32 lpc [2 ];
1286
+ opus_val16 freq ;
1287
+ VARDECL (opus_val16 , x );
1288
+ ALLOC (x , N + overlap , opus_val16 );
1289
+ /* Shift by SIG_SHIFT+1 (+2 for stereo) to account for HF gain of the preemphasis filter. */
1290
+ if (CC == 2 ) {
1291
+ for (i = 0 ;i < N ;i ++ ) x [i + overlap ] = PSHR32 (ADD32 (in [i ], in [i + N + overlap ]), SIG_SHIFT + 2 );
1292
+ for (i = 0 ;i < overlap ;i ++ ) x [i ] = PSHR32 (ADD32 (prefilter_mem [COMBFILTER_MAXPERIOD - overlap + i ], prefilter_mem [2 * COMBFILTER_MAXPERIOD - overlap + i ]), SIG_SHIFT + 2 );
1293
+ } else {
1294
+ for (i = 0 ;i < N ;i ++ ) x [i + overlap ] = PSHR32 (in [i ], SIG_SHIFT + 1 );
1295
+ for (i = 0 ;i < overlap ;i ++ ) x [i ] = PSHR32 (prefilter_mem [COMBFILTER_MAXPERIOD - overlap + i ], SIG_SHIFT + 1 );
1296
+ }
1297
+ #ifdef FIXED_POINT
1298
+ normalize_tone_input (x , N + overlap );
1299
+ #endif
1300
+ fail = tone_lpc (x , N + overlap , delay , lpc );
1301
+ /* If our LPC filter resonates too close to DC, retry the analysis with down-sampling. */
1302
+ while (delay <= Fs /3000 && (fail || (lpc [0 ] > QCONST32 (1.f , 29 ) && lpc [1 ] < 0 ))) {
1303
+ delay *= 2 ;
1304
+ fail = tone_lpc (x , N + overlap , delay , lpc );
1305
+ }
1306
+ /* Check that our filter has complex roots. */
1307
+ if (!fail && MULT32_32_Q31 (lpc [0 ],lpc [0 ]) + MULT32_32_Q31 (QCONST32 (4.f , 29 ), lpc [1 ]) < 0 ) {
1308
+ /* Squared radius of the poles. */
1309
+ * toneishness = - lpc [1 ];
1310
+ #ifdef FIXED_POINT
1311
+ freq = acos_approx (lpc [0 ]>>1 )/delay ;
1312
+ #else
1313
+ freq = acos (.5f * lpc [0 ])/delay ;
1314
+ #endif
1315
+ } else {
1316
+ freq = -1 ;
1317
+ * toneishness = 0 ;
1318
+ }
1319
+ /*printf("%f %f %f %f\n", freq, lpc[0], lpc[1], *toneishness);*/
1320
+ return freq ;
1321
+ }
1187
1322
1188
1323
static int run_prefilter (CELTEncoder * st , celt_sig * in , celt_sig * prefilter_mem , int CC , int N ,
1189
- int prefilter_tapset , int * pitch , opus_val16 * gain , int * qgain , int enabled , int nbAvailableBytes , AnalysisInfo * analysis )
1324
+ int prefilter_tapset , int * pitch , opus_val16 * gain , int * qgain , int enabled , int nbAvailableBytes , AnalysisInfo * analysis , opus_val16 tone_freq , opus_val32 toneishness )
1190
1325
{
1191
1326
int c ;
1192
1327
VARDECL (celt_sig , _pre );
@@ -1231,6 +1366,26 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
1231
1366
if (pitch_index > COMBFILTER_MAXPERIOD - 2 )
1232
1367
pitch_index = COMBFILTER_MAXPERIOD - 2 ;
1233
1368
gain1 = MULT16_16_Q15 (QCONST16 (.7f ,15 ),gain1 );
1369
+ /* If we detect that the signal is dominated by a single tone, don't rely on the standard pitch
1370
+ estimator, as it can become unreliable. */
1371
+ if (toneishness > QCONST32 (.99f , 29 )) {
1372
+ /* If the pitch is too high for our post-filter, apply pitch doubling until
1373
+ we can get something that fits (not ideal, but better than nothing). */
1374
+ while (tone_freq >= QCONST16 (0.39f , 13 )) tone_freq /=2 ;
1375
+ if (tone_freq > QCONST16 (0.006148f , 13 )) {
1376
+ #ifdef FIXED_POINT
1377
+ pitch_index = IMIN (51472 /tone_freq , COMBFILTER_MAXPERIOD - 2 );
1378
+ #else
1379
+ pitch_index = IMIN ((int )floor (.5 + 2.f * M_PI /tone_freq ), COMBFILTER_MAXPERIOD - 2 );
1380
+ #endif
1381
+ } else {
1382
+ /* If the pitch is too low, using a very high pitch will actually give us an improvement
1383
+ due to the DC component of the filter that will be close to our tone. Again, not ideal,
1384
+ but if we only have a single tone, it's better than nothing. */
1385
+ pitch_index = COMBFILTER_MINPERIOD ;
1386
+ }
1387
+ gain1 = QCONST16 (.75f , 15 );
1388
+ }
1234
1389
/*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/
1235
1390
if (st -> loss_rate > 2 )
1236
1391
gain1 = HALF32 (gain1 );
@@ -1499,6 +1654,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
1499
1654
int hybrid ;
1500
1655
int weak_transient = 0 ;
1501
1656
int enable_tf_analysis ;
1657
+ opus_val16 tone_freq = -1 ;
1658
+ opus_val32 toneishness = 0 ;
1502
1659
VARDECL (opus_val16 , surround_dynalloc );
1503
1660
ALLOC_STACK ;
1504
1661
@@ -1683,7 +1840,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
1683
1840
} while (++ c < CC );
1684
1841
1685
1842
1686
-
1843
+ tone_freq = tone_detect ( in + overlap , prefilter_mem , 1 , N , overlap , & toneishness , mode -> Fs );
1687
1844
/* Find pitch period and gain */
1688
1845
{
1689
1846
int enabled ;
@@ -1692,7 +1849,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
1692
1849
&& st -> complexity >= 5 ;
1693
1850
1694
1851
prefilter_tapset = st -> tapset_decision ;
1695
- pf_on = run_prefilter (st , in , prefilter_mem , CC , N , prefilter_tapset , & pitch_index , & gain1 , & qg , enabled , nbAvailableBytes , & st -> analysis );
1852
+ pf_on = run_prefilter (st , in , prefilter_mem , CC , N , prefilter_tapset , & pitch_index , & gain1 , & qg , enabled , nbAvailableBytes , & st -> analysis , tone_freq , toneishness );
1696
1853
if ((gain1 > QCONST16 (.4f ,15 ) || st -> prefilter_gain > QCONST16 (.4f ,15 )) && (!st -> analysis .valid || st -> analysis .tonality > .3 )
1697
1854
&& (pitch_index > 1.26 * st -> prefilter_period || pitch_index < .79 * st -> prefilter_period ))
1698
1855
pitch_change = 1 ;
@@ -1724,7 +1881,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
1724
1881
though (small SILK quantization offset value). */
1725
1882
int allow_weak_transients = hybrid && effectiveBytes < 15 && st -> silk_info .signalType != 2 ;
1726
1883
isTransient = transient_analysis (in , N + overlap , CC ,
1727
- & tf_estimate , & tf_chan , allow_weak_transients , & weak_transient );
1884
+ & tf_estimate , & tf_chan , allow_weak_transients , & weak_transient , tone_freq , toneishness );
1728
1885
}
1729
1886
if (LM > 0 && ec_tell (enc )+ 3 <=total_bits )
1730
1887
{
0 commit comments