From 3533472690075104eb59f275e80359ecc91fb2aa Mon Sep 17 00:00:00 2001 From: Tom Early Date: Mon, 29 Nov 2021 05:50:23 -0700 Subject: [PATCH] added codec2 from mvoice --- codec2/codebooks.cpp | 964 +++++++++++++++++++++ codec2/codec2.cpp | 1732 ++++++++++++++++++++++++++++++++++++++ codec2/codec2.h | 98 +++ codec2/codec2_internal.h | 67 ++ codec2/defines.h | 127 +++ codec2/kiss_fft.cpp | 435 ++++++++++ codec2/kiss_fft.h | 35 + codec2/lpc.cpp | 311 +++++++ codec2/lpc.h | 47 ++ codec2/nlp.cpp | 520 ++++++++++++ codec2/nlp.h | 87 ++ codec2/pack.cpp | 139 +++ codec2/qbase.cpp | 247 ++++++ codec2/qbase.h | 39 + codec2/quantise.cpp | 897 ++++++++++++++++++++ codec2/quantise.h | 68 ++ 16 files changed, 5813 insertions(+) create mode 100644 codec2/codebooks.cpp create mode 100644 codec2/codec2.cpp create mode 100644 codec2/codec2.h create mode 100644 codec2/codec2_internal.h create mode 100644 codec2/defines.h create mode 100644 codec2/kiss_fft.cpp create mode 100644 codec2/kiss_fft.h create mode 100644 codec2/lpc.cpp create mode 100644 codec2/lpc.h create mode 100644 codec2/nlp.cpp create mode 100644 codec2/nlp.h create mode 100644 codec2/pack.cpp create mode 100644 codec2/qbase.cpp create mode 100644 codec2/qbase.h create mode 100644 codec2/quantise.cpp create mode 100644 codec2/quantise.h diff --git a/codec2/codebooks.cpp b/codec2/codebooks.cpp new file mode 100644 index 0000000..bf319ce --- /dev/null +++ b/codec2/codebooks.cpp @@ -0,0 +1,964 @@ +/* + * This intermediary file and the files that used to create it are under + * The LGPL. See the file COPYING. + */ + +#include "defines.h" + +/* codebook/lsp1.txt */ +static float codes00[] = +{ + 225, + 250, + 275, + 300, + 325, + 350, + 375, + 400, + 425, + 450, + 475, + 500, + 525, + 550, + 575, + 600 +}; +/* codebook/lsp2.txt */ +static float codes01[] = +{ + 325, + 350, + 375, + 400, + 425, + 450, + 475, + 500, + 525, + 550, + 575, + 600, + 625, + 650, + 675, + 700 +}; +/* codebook/lsp3.txt */ +static float codes02[] = +{ + 500, + 550, + 600, + 650, + 700, + 750, + 800, + 850, + 900, + 950, + 1000, + 1050, + 1100, + 1150, + 1200, + 1250 +}; +/* codebook/lsp4.txt */ +static float codes03[] = +{ + 700, + 800, + 900, + 1000, + 1100, + 1200, + 1300, + 1400, + 1500, + 1600, + 1700, + 1800, + 1900, + 2000, + 2100, + 2200 +}; +/* codebook/lsp5.txt */ +static float codes04[] = +{ + 950, + 1050, + 1150, + 1250, + 1350, + 1450, + 1550, + 1650, + 1750, + 1850, + 1950, + 2050, + 2150, + 2250, + 2350, + 2450 +}; +/* codebook/lsp6.txt */ +static float codes05[] = +{ + 1100, + 1200, + 1300, + 1400, + 1500, + 1600, + 1700, + 1800, + 1900, + 2000, + 2100, + 2200, + 2300, + 2400, + 2500, + 2600 +}; +/* codebook/lsp7.txt */ +static float codes06[] = +{ + 1500, + 1600, + 1700, + 1800, + 1900, + 2000, + 2100, + 2200, + 2300, + 2400, + 2500, + 2600, + 2700, + 2800, + 2900, + 3000 +}; +/* codebook/lsp8.txt */ +static float codes07[] = +{ + 2300, + 2400, + 2500, + 2600, + 2700, + 2800, + 2900, + 3000 +}; +/* codebook/lsp9.txt */ +static float codes08[] = +{ + 2500, + 2600, + 2700, + 2800, + 2900, + 3000, + 3100, + 3200 +}; +/* codebook/lsp10.txt */ +static float codes09[] = +{ + 2900, + 3100, + 3300, + 3500 +}; + +const struct lsp_codebook lsp_cb[] = +{ + /* codebook/lsp1.txt */ + { + 1, + 4, + 16, + codes00 + }, + /* codebook/lsp2.txt */ + { + 1, + 4, + 16, + codes01 + }, + /* codebook/lsp3.txt */ + { + 1, + 4, + 16, + codes02 + }, + /* codebook/lsp4.txt */ + { + 1, + 4, + 16, + codes03 + }, + /* codebook/lsp5.txt */ + { + 1, + 4, + 16, + codes04 + }, + /* codebook/lsp6.txt */ + { + 1, + 4, + 16, + codes05 + }, + /* codebook/lsp7.txt */ + { + 1, + 4, + 16, + codes06 + }, + /* codebook/lsp8.txt */ + { + 1, + 3, + 8, + codes07 + }, + /* codebook/lsp9.txt */ + { + 1, + 3, + 8, + codes08 + }, + /* codebook/lsp10.txt */ + { + 1, + 2, + 4, + codes09 + }, + { 0, 0, 0, 0 } +}; + +/* codebook/dlsp1.txt */ +static float codes10[] = +{ + 25, + 50, + 75, + 100, + 125, + 150, + 175, + 200, + 225, + 250, + 275, + 300, + 325, + 350, + 375, + 400, + 425, + 450, + 475, + 500, + 525, + 550, + 575, + 600, + 625, + 650, + 675, + 700, + 725, + 750, + 775, + 800 +}; +/* codebook/dlsp2.txt */ +static float codes11[] = +{ + 25, + 50, + 75, + 100, + 125, + 150, + 175, + 200, + 225, + 250, + 275, + 300, + 325, + 350, + 375, + 400, + 425, + 450, + 475, + 500, + 525, + 550, + 575, + 600, + 625, + 650, + 675, + 700, + 725, + 750, + 775, + 800 +}; +/* codebook/dlsp3.txt */ +static float codes12[] = +{ + 25, + 50, + 75, + 100, + 125, + 150, + 175, + 200, + 225, + 250, + 275, + 300, + 325, + 350, + 375, + 400, + 425, + 450, + 475, + 500, + 525, + 550, + 575, + 600, + 625, + 650, + 675, + 700, + 725, + 750, + 775, + 800 +}; +/* codebook/dlsp4.txt */ +static float codes13[] = +{ + 25, + 50, + 75, + 100, + 125, + 150, + 175, + 200, + 250, + 300, + 350, + 400, + 450, + 500, + 550, + 600, + 650, + 700, + 750, + 800, + 850, + 900, + 950, + 1000, + 1050, + 1100, + 1150, + 1200, + 1250, + 1300, + 1350, + 1400 +}; +/* codebook/dlsp5.txt */ +static float codes14[] = +{ + 25, + 50, + 75, + 100, + 125, + 150, + 175, + 200, + 250, + 300, + 350, + 400, + 450, + 500, + 550, + 600, + 650, + 700, + 750, + 800, + 850, + 900, + 950, + 1000, + 1050, + 1100, + 1150, + 1200, + 1250, + 1300, + 1350, + 1400 +}; +/* codebook/dlsp6.txt */ +static float codes15[] = +{ + 25, + 50, + 75, + 100, + 125, + 150, + 175, + 200, + 250, + 300, + 350, + 400, + 450, + 500, + 550, + 600, + 650, + 700, + 750, + 800, + 850, + 900, + 950, + 1000, + 1050, + 1100, + 1150, + 1200, + 1250, + 1300, + 1350, + 1400 +}; +/* codebook/dlsp7.txt */ +static float codes16[] = +{ + 25, + 50, + 75, + 100, + 125, + 150, + 175, + 200, + 225, + 250, + 275, + 300, + 325, + 350, + 375, + 400, + 425, + 450, + 475, + 500, + 525, + 550, + 575, + 600, + 625, + 650, + 675, + 700, + 725, + 750, + 775, + 800 +}; +/* codebook/dlsp8.txt */ +static float codes17[] = +{ + 25, + 50, + 75, + 100, + 125, + 150, + 175, + 200, + 225, + 250, + 275, + 300, + 325, + 350, + 375, + 400, + 425, + 450, + 475, + 500, + 525, + 550, + 575, + 600, + 625, + 650, + 675, + 700, + 725, + 750, + 775, + 800 +}; +/* codebook/dlsp9.txt */ +static float codes18[] = +{ + 25, + 50, + 75, + 100, + 125, + 150, + 175, + 200, + 225, + 250, + 275, + 300, + 325, + 350, + 375, + 400, + 425, + 450, + 475, + 500, + 525, + 550, + 575, + 600, + 625, + 650, + 675, + 700, + 725, + 750, + 775, + 800 +}; +/* codebook/dlsp10.txt */ +static float codes19[] = +{ + 25, + 50, + 75, + 100, + 125, + 150, + 175, + 200, + 225, + 250, + 275, + 300, + 325, + 350, + 375, + 400, + 425, + 450, + 475, + 500, + 525, + 550, + 575, + 600, + 625, + 650, + 675, + 700, + 725, + 750, + 775, + 800 +}; + +const struct lsp_codebook lsp_cbd[] = +{ + /* codebook/dlsp1.txt */ + { + 1, + 5, + 32, + codes10 + }, + /* codebook/dlsp2.txt */ + { + 1, + 5, + 32, + codes11 + }, + /* codebook/dlsp3.txt */ + { + 1, + 5, + 32, + codes12 + }, + /* codebook/dlsp4.txt */ + { + 1, + 5, + 32, + codes13 + }, + /* codebook/dlsp5.txt */ + { + 1, + 5, + 32, + codes14 + }, + /* codebook/dlsp6.txt */ + { + 1, + 5, + 32, + codes15 + }, + /* codebook/dlsp7.txt */ + { + 1, + 5, + 32, + codes16 + }, + /* codebook/dlsp8.txt */ + { + 1, + 5, + 32, + codes17 + }, + /* codebook/dlsp9.txt */ + { + 1, + 5, + 32, + codes18 + }, + /* codebook/dlsp10.txt */ + { + 1, + 5, + 32, + codes19 + }, + { 0, 0, 0, 0 } +}; + + +/* codebook/gecb.txt */ +static float codes30[] = +{ + 2.71, 12.0184, + 0.04675, -2.73881, + 0.120993, 8.38895, + -1.58028, -0.892307, + 1.19307, -1.91561, + 0.187101, -3.27679, + 0.332251, -7.66455, + -1.47944, 31.2461, + 1.52761, 27.7095, + -0.524379, 5.25012, + 0.55333, 7.4388, + -0.843451, -1.95299, + 2.26389, 8.61029, + 0.143143, 2.36549, + 0.616506, 1.28427, + -1.71133, 22.0967, + 1.00813, 17.3965, + -0.106718, 1.41891, + -0.136246, 14.2736, + -1.70909, -20.5319, + 1.65787, -3.39107, + 0.138049, -4.95785, + 0.536729, -1.94375, + 0.196307, 36.8519, + 1.27248, 22.5565, + -0.670219, -1.90604, + 0.382092, 6.40113, + -0.756911, -4.90102, + 1.82931, 4.6138, + 0.318794, 0.73683, + 0.612815, -2.07505, + -0.410151, 24.7871, + 1.77602, 13.1909, + 0.106457, -0.104492, + 0.192206, 10.1838, + -1.82442, -7.71565, + 0.931346, 4.34835, + 0.308813, -4.086, + 0.397143, -11.8089, + -0.048715, 41.2273, + 0.877342, 35.8503, + -0.759794, 0.476634, + 0.978593, 7.67467, + -1.19506, 3.03883, + 2.63989, -3.41106, + 0.191127, 3.60351, + 0.402932, 1.0843, + -2.15202, 18.1076, + 1.5468, 8.32271, + -0.143089, -4.07592, + -0.150142, 5.86674, + -1.40844, -3.2507, + 1.56615, -10.4132, + 0.178171, -10.2267, + 0.362164, -0.028556, + -0.070125, 24.3907, + 0.594752, 17.4828, + -0.28698, -6.90407, + 0.464818, 10.2055, + -1.00684, -14.3572, + 2.32957, -3.69161, + 0.335745, 2.40714, + 1.01966, -3.15565, + -1.25945, 7.9919, + 2.38369, 19.6806, + -0.094947, -2.41374, + 0.20933, 6.66477, + -2.22103, 1.37986, + 1.29239, 2.04633, + 0.243626, -0.890741, + 0.428773, -7.19366, + -1.11374, 41.3414, + 2.6098, 31.1405, + -0.446468, 2.53419, + 0.490104, 4.62757, + -1.11723, -3.24174, + 1.79156, 8.41493, + 0.156012, 0.183336, + 0.532447, 3.15455, + -0.764484, 18.514, + 0.952395, 11.7713, + -0.332567, 0.346987, + 0.202165, 14.7168, + -2.12924, -15.559, + 1.35358, -1.92679, + -0.010963, -16.3364, + 0.399053, -2.79057, + 0.750657, 31.1483, + 0.655743, 24.4819, + -0.45321, -0.735879, + 0.2869, 6.5467, + -0.715673, -12.3578, + 1.54849, 3.87217, + 0.271874, 0.802339, + 0.502073, -4.85485, + -0.497037, 17.7619, + 1.19116, 13.9544, + 0.01563, 1.33157, + 0.341867, 8.93537, + -2.31601, -5.39506, + 0.75861, 1.9645, + 0.24132, -3.23769, + 0.267151, -11.2344, + -0.273126, 32.6248, + 1.75352, 40.432, + -0.784011, 3.04576, + 0.705987, 5.66118, + -1.3864, 1.35356, + 2.37646, 1.67485, + 0.242973, 4.73218, + 0.491227, 0.354061, + -1.60676, 8.65895, + 1.16711, 5.9871, + -0.137601, -12.0417, + -0.251375, 10.3972, + -1.43151, -8.90411, + 0.98828, -13.209, + 0.261484, -6.35497, + 0.395932, -0.702529, + 0.283704, 26.8996, + 0.420959, 15.4418, + -0.355804, -13.7278, + 0.527372, 12.3985, + -1.16956, -15.9985, + 1.90669, -5.81605, + 0.354492, 3.85157, + 0.82576, -4.16264, + -0.49019, 13.0572, + 2.25577, 13.5264, + -0.004956, -3.23713, + 0.026709, 7.86645, + -1.81037, -0.451183, + 1.08383, -0.18362, + 0.135836, -2.26658, + 0.375812, -5.51225, + -1.96644, 38.6829, + 1.97799, 24.5655, + -0.704656, 6.35881, + 0.480786, 7.05175, + -0.976417, -2.42273, + 2.50215, 6.75935, + 0.083588, 3.2588, + 0.543629, 0.910013, + -1.23196, 23.0915, + 0.785492, 14.807, + -0.213554, 1.688, + 0.004748, 18.1718, + -1.54719, -16.1168, + 1.50104, -3.28114, + 0.080133, -4.63472, + 0.476592, -2.18093, + 0.44247, 40.304, + 1.07277, 27.592, + -0.594738, -4.16681, + 0.42248, 7.61609, + -0.927521, -7.27441, + 1.99162, 1.29636, + 0.291307, 2.39878, + 0.721081, -1.95062, + -0.804256, 24.9295, + 1.64839, 19.1197, + 0.060852, -0.590639, + 0.266085, 9.10325, + -1.9574, -2.88461, + 1.11693, 2.6724, + 0.35458, -2.74854, + 0.330733, -14.1561, + -0.527851, 39.5756, + 0.991152, 43.195, + -0.589619, 1.26919, + 0.787401, 8.73071, + -1.0138, 1.02507, + 2.8254, 1.89538, + 0.24089, 2.74557, + 0.427195, 2.54446, + -1.95311, 12.244, + 1.44862, 12.0607, + -0.210492, -3.37906, + -0.056713, 10.204, + -1.65237, -5.10274, + 1.29475, -12.2708, + 0.111608, -8.67592, + 0.326634, -1.16763, + 0.021781, 31.1258, + 0.455335, 21.4684, + -0.37544, -3.37121, + 0.39362, 11.302, + -0.851456, -19.4149, + 2.10703, -2.22886, + 0.373233, 1.92406, + 0.884438, -1.72058, + -0.975127, 9.84013, + 2.0033, 17.3954, + -0.036915, -1.11137, + 0.148456, 5.39997, + -1.91441, 4.77382, + 1.44791, 0.537122, + 0.194979, -1.03818, + 0.495771, -9.95502, + -1.05899, 32.9471, + 2.01122, 32.4544, + -0.30965, 4.71911, + 0.436082, 4.63552, + -1.23711, -1.25428, + 2.02274, 9.42834, + 0.190342, 1.46077, + 0.479017, 2.48479, + -1.07848, 16.2217, + 1.20764, 9.65421, + -0.258087, -1.67236, + 0.071852, 13.416, + -1.87723, -16.072, + 1.28957, -4.87118, + 0.067713, -13.4427, + 0.435551, -4.1655, + 0.46614, 30.5895, + 0.904895, 21.598, + -0.518369, -2.53205, + 0.337363, 5.63726, + -0.554975, -17.4005, + 1.69188, 1.14574, + 0.227934, 0.889297, + 0.587303, -5.72973, + -0.262133, 18.6666, + 1.39505, 17.0029, + -0.01909, 4.30838, + 0.304235, 12.6699, + -2.07406, -6.46084, + 0.920546, 1.21296, + 0.284927, -1.78547, + 0.209724, -16.024, + -0.636067, 31.5768, + 1.34989, 34.6775, + -0.971625, 5.30086, + 0.590249, 4.44971, + -1.56787, 3.60239, + 2.1455, 4.51666, + 0.296022, 4.12017, + 0.445299, 0.868772, + -1.44193, 14.1284, + 1.35575, 6.0074, + -0.012814, -7.49657, + -0.43, 8.50012, + -1.20469, -7.11326, + 1.10102, -6.83682, + 0.196463, -6.234, + 0.436747, -1.12979, + 0.141052, 22.8549, + 0.290821, 18.8114, + -0.529536, -7.73251, + 0.63428, 10.7898, + -1.33472, -20.3258, + 1.81564, -1.90332, + 0.394778, 3.79758, + 0.732682, -8.18382, + -0.741244, 11.7683 +}; + +const struct lsp_codebook ge_cb[] = +{ + /* codebook/gecb.txt */ + { + 2, + 8, + 256, + codes30 + }, + { 0, 0, 0, 0 } +}; diff --git a/codec2/codec2.cpp b/codec2/codec2.cpp new file mode 100644 index 0000000..4774b0c --- /dev/null +++ b/codec2/codec2.cpp @@ -0,0 +1,1732 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: codec2.c + AUTHOR......: David Rowe + DATE CREATED: 21/8/2010 + + Codec2 fully quantised encoder and decoder functions. If you want use + codec2, the codec2_xxx functions are for you. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2010 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 2.1, as + published by the free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see . +*/ + +#include +#include +#include +#include +#include +#include + +#include "nlp.h" +#include "lpc.h" +#include "quantise.h" +#include "codec2.h" +#include "codec2_internal.h" + +#define HPF_BETA 0.125 +#define BPF_N 101 + +CKissFFT kiss; + +/*---------------------------------------------------------------------------* \ + + FUNCTION HEADERS + +\*---------------------------------------------------------------------------*/ + + + + +/*---------------------------------------------------------------------------*\ + + FUNCTIONS + +\*---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: codec2_create + AUTHOR......: David Rowe + DATE CREATED: 21/8/2010 + + Create and initialise an instance of the codec. Returns a pointer + to the codec states or NULL on failure. One set of states is + sufficient for a full duuplex codec (i.e. an encoder and decoder). + You don't need separate states for encoders and decoders. See + c2enc.c and c2dec.c for examples. + +\*---------------------------------------------------------------------------*/ + +CCodec2::CCodec2(bool is_3200) +{ + c2.mode = is_3200 ? 3200 : 1600; + + /* store constants in a few places for convenience */ + + c2.c2const = c2const_create(8000, N_S); + c2.Fs = c2.c2const.Fs; + int n_samp = c2.n_samp = c2.c2const.n_samp; + int m_pitch = c2.m_pitch = c2.c2const.m_pitch; + + c2.Pn.resize(2*n_samp); + c2.Sn_.resize(2*n_samp); + c2.w.resize(m_pitch); + c2.Sn.resize(m_pitch); + + for(int i=0; i Aw[FFT_ENC]; + + /* only need to zero these out due to (unused) snr calculation */ + + for(i=0; i<2; i++) + for(j=1; j<=MAX_AMP; j++) + model[i].A[j] = 0.0; + + /* unpack bits from channel ------------------------------------*/ + + /* this will partially fill the model params for the 2 x 10ms + frames */ + + model[0].voiced = qt.unpack(bits, &nbit, 1); + model[1].voiced = qt.unpack(bits, &nbit, 1); + + Wo_index = qt.unpack(bits, &nbit, WO_BITS); + model[1].Wo = qt.decode_Wo(&c2.c2const, Wo_index, WO_BITS); + model[1].L = PI/model[1].Wo; + + e_index = qt.unpack(bits, &nbit, E_BITS); + e[1] = qt.decode_energy(e_index, E_BITS); + + for(i=0; i Aw[FFT_ENC]; + + /* only need to zero these out due to (unused) snr calculation */ + + for(i=0; i<4; i++) + for(j=1; j<=MAX_AMP; j++) + model[i].A[j] = 0.0; + + /* unpack bits from channel ------------------------------------*/ + + /* this will partially fill the model params for the 4 x 10ms + frames */ + + model[0].voiced = qt.unpack(bits, &nbit, 1); + + model[1].voiced = qt.unpack(bits, &nbit, 1); + Wo_index = qt.unpack(bits, &nbit, WO_BITS); + model[1].Wo = qt.decode_Wo(&c2.c2const, Wo_index, WO_BITS); + model[1].L = PI/model[1].Wo; + + e_index = qt.unpack(bits, &nbit, E_BITS); + e[1] = qt.decode_energy(e_index, E_BITS); + + model[2].voiced = qt.unpack(bits, &nbit, 1); + + model[3].voiced = qt.unpack(bits, &nbit, 1); + Wo_index = qt.unpack(bits, &nbit, WO_BITS); + model[3].Wo = qt.decode_Wo(&c2.c2const, Wo_index, WO_BITS); + model[3].L = PI/model[3].Wo; + + e_index = qt.unpack(bits, &nbit, E_BITS); + e[3] = qt.decode_energy(e_index, E_BITS); + + for(i=0; i Aw[], float gain) +{ + int i; + + /* LPC based phase synthesis */ + std::complex H[MAX_AMP+1]; + sample_phase(model, H, Aw); + phase_synth_zero_order(c2.n_samp, model, &c2.ex_phase, H); + + postfilter(model, &c2.bg_est); + synthesise(c2.n_samp, &(c2.fftr_inv_cfg), c2.Sn_.data(), model, c2.Pn.data(), 1); + + for(i=0; i 32767.0) + speech[i] = 32767; + else if (c2.Sn_[i] < -32767.0) + speech[i] = -32767; + else + speech[i] = c2.Sn_[i]; + } + +} + + +/*---------------------------------------------------------------------------* \ + + FUNCTION....: analyse_one_frame() + AUTHOR......: David Rowe + DATE CREATED: 23/8/2010 + + Extract sinusoidal model parameters from 80 speech samples (10ms of + speech). + +\*---------------------------------------------------------------------------*/ + +void CCodec2::analyse_one_frame(MODEL *model, const short *speech) +{ + std::complex Sw[FFT_ENC]; + float pitch; + int i; + int n_samp = c2.n_samp; + int m_pitch = c2.m_pitch; + + /* Read input speech */ + + for(i=0; iWo = TWO_PI/pitch; + model->L = PI/model->Wo; + + /* estimate model parameters */ + two_stage_pitch_refinement(&c2.c2const, model, Sw); + + /* estimate phases when doing ML experiments */ + estimate_amplitudes(model, Sw, 0); + est_voicing_mbe(&c2.c2const, model, Sw, c2.W); +} + + +/*---------------------------------------------------------------------------* \ + + FUNCTION....: ear_protection() + AUTHOR......: David Rowe + DATE CREATED: Nov 7 2012 + + Limits output level to protect ears when there are bit errors or the input + is overdriven. This doesn't correct or mask bit errors, just reduces the + worst of their damage. + +\*---------------------------------------------------------------------------*/ + +void CCodec2::ear_protection(float in_out[], int n) +{ + float max_sample, over, gain; + int i; + + /* find maximum sample in frame */ + + max_sample = 0.0; + for(i=0; i max_sample) + max_sample = in_out[i]; + + /* determine how far above set point */ + + over = max_sample/30000.0; + + /* If we are x dB over set point we reduce level by 2x dB, this + attenuates major excursions in amplitude (likely to be caused + by bit errors) more than smaller ones */ + + if (over > 1.0) + { + gain = 1.0/(over*over); + for(i=0; i H[], + std::complex A[] /* LPC analysis filter in freq domain */ +) +{ + int m, b; + float r; + + r = TWO_PI/(FFT_ENC); + + /* Sample phase at harmonics */ + + for(m=1; m<=model->L; m++) + { + b = (int)(m*model->Wo/r + 0.5); + H[m] = std::conj(A[b]); + } +} + + +/*---------------------------------------------------------------------------*\ + + phase_synth_zero_order() + + Synthesises phases based on SNR and a rule based approach. No phase + parameters are required apart from the SNR (which can be reduced to a + 1 bit V/UV decision per frame). + + The phase of each harmonic is modelled as the phase of a synthesis + filter excited by an impulse. In many Codec 2 modes the synthesis + filter is a LPC filter. Unlike the first order model the position + of the impulse is not transmitted, so we create an excitation pulse + train using a rule based approach. + + Consider a pulse train with a pulse starting time n=0, with pulses + repeated at a rate of Wo, the fundamental frequency. A pulse train + in the time domain is equivalent to harmonics in the frequency + domain. We can make an excitation pulse train using a sum of + sinsusoids: + + for(m=1; m<=L; m++) + ex[n] = cos(m*Wo*n) + + Note: the Octave script ../octave/phase.m is an example of this if + you would like to try making a pulse train. + + The phase of each excitation harmonic is: + + arg(E[m]) = mWo + + where E[m] are the complex excitation (freq domain) samples, + arg(x), just returns the phase of a complex sample x. + + As we don't transmit the pulse position for this model, we need to + synthesise it. Now the excitation pulses occur at a rate of Wo. + This means the phase of the first harmonic advances by N_SAMP samples + over a synthesis frame of N_SAMP samples. For example if Wo is pi/20 + (200 Hz), then over a 10ms frame (N_SAMP=80 samples), the phase of the + first harmonic would advance (pi/20)*80 = 4*pi or two complete + cycles. + + We generate the excitation phase of the fundamental (first + harmonic): + + arg[E[1]] = Wo*N_SAMP; + + We then relate the phase of the m-th excitation harmonic to the + phase of the fundamental as: + + arg(E[m]) = m*arg(E[1]) + + This E[m] then gets passed through the LPC synthesis filter to + determine the final harmonic phase. + + Comparing to speech synthesised using original phases: + + - Through headphones speech synthesised with this model is not as + good. Through a loudspeaker it is very close to original phases. + + - If there are voicing errors, the speech can sound clicky or + staticy. If V speech is mistakenly declared UV, this model tends to + synthesise impulses or clicks, as there is usually very little shift or + dispersion through the LPC synthesis filter. + + - When combined with LPC amplitude modelling there is an additional + drop in quality. I am not sure why, theory is interformant energy + is raised making any phase errors more obvious. + + NOTES: + + 1/ This synthesis model is effectively the same as a simple LPC-10 + vocoders, and yet sounds much better. Why? Conventional wisdom + (AMBE, MELP) says mixed voicing is required for high quality + speech. + + 2/ I am pretty sure the Lincoln Lab sinusoidal coding guys (like xMBE + also from MIT) first described this zero phase model, I need to look + up the paper. + + 3/ Note that this approach could cause some discontinuities in + the phase at the edge of synthesis frames, as no attempt is made + to make sure that the phase tracks are continuous (the excitation + phases are continuous, but not the final phases after filtering + by the LPC spectra). Technically this is a bad thing. However + this may actually be a good thing, disturbing the phase tracks a + bit. More research needed, e.g. test a synthesis model that adds + a small delta-W to make phase tracks line up for voiced + harmonics. + +\*---------------------------------------------------------------------------*/ + +void CCodec2::phase_synth_zero_order( + int n_samp, + MODEL *model, + float *ex_phase, /* excitation phase of fundamental */ + std::complex H[] /* L synthesis filter freq domain samples */ + +) +{ + int m; + float new_phi; + std::complex Ex[MAX_AMP+1]; /* excitation samples */ + std::complex A_[MAX_AMP+1]; /* synthesised harmonic samples */ + + /* + Update excitation fundamental phase track, this sets the position + of each pitch pulse during voiced speech. After much experiment + I found that using just this frame's Wo improved quality for UV + sounds compared to interpolating two frames Wo like this: + + ex_phase[0] += (*prev_Wo+model->Wo)*N_SAMP/2; + */ + + ex_phase[0] += (model->Wo)*n_samp; + ex_phase[0] -= TWO_PI*floorf(ex_phase[0]/TWO_PI + 0.5); + + for(m=1; m<=model->L; m++) + { + + /* generate excitation */ + + if (model->voiced) + { + Ex[m] = std::polar(1.0f, ex_phase[0] * m); + } + else + { + + /* When a few samples were tested I found that LPC filter + phase is not needed in the unvoiced case, but no harm in + keeping it. + */ + float phi = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX; + Ex[m] = std::polar(1.0f, phi); + } + + /* filter using LPC filter */ + + A_[m].real(H[m].real() * Ex[m].real() - H[m].imag() * Ex[m].imag()); + A_[m].imag(H[m].imag() * Ex[m].real() + H[m].real() * Ex[m].imag()); + + /* modify sinusoidal phase */ + + new_phi = atan2f(A_[m].imag(), A_[m].real()+1E-12); + model->phi[m] = new_phi; + } + +} + +/*---------------------------------------------------------------------------*\ + + postfilter() + + The post filter is designed to help with speech corrupted by + background noise. The zero phase model tends to make speech with + background noise sound "clicky". With high levels of background + noise the low level inter-formant parts of the spectrum will contain + noise rather than speech harmonics, so modelling them as voiced + (i.e. a continuous, non-random phase track) is inaccurate. + + Some codecs (like MBE) have a mixed voicing model that breaks the + spectrum into voiced and unvoiced regions. Several bits/frame + (5-12) are required to transmit the frequency selective voicing + information. Mixed excitation also requires accurate voicing + estimation (parameter estimators always break occasionally under + exceptional conditions). + + In our case we use a post filter approach which requires no + additional bits to be transmitted. The decoder measures the average + level of the background noise during unvoiced frames. If a harmonic + is less than this level it is made unvoiced by randomising it's + phases. + + This idea is rather experimental. Some potential problems that may + happen: + + 1/ If someone says "aaaaaaaahhhhhhhhh" will background estimator track + up to speech level? This would be a bad thing. + + 2/ If background noise suddenly dissapears from the source speech does + estimate drop quickly? What is noise suddenly re-appears? + + 3/ Background noise with a non-flat sepctrum. Current algorithm just + comsiders spectrum as a whole, but this could be broken up into + bands, each with their own estimator. + + 4/ Males and females with the same level of background noise. Check + performance the same. Changing Wo affects width of each band, may + affect bg energy estimates. + + 5/ Not sure what happens during long periods of voiced speech + e.g. "sshhhhhhh" + +\*---------------------------------------------------------------------------*/ + +#define BG_THRESH 40.0 // only consider low levels signals for bg_est +#define BG_BETA 0.1 // averaging filter constant +#define BG_MARGIN 6.0 // harmonics this far above BG noise are + // randomised. Helped make bg noise less + // spikey (impulsive) for mmt1, but speech was + // perhaps a little rougher. + +void CCodec2::postfilter( MODEL *model, float *bg_est ) +{ + int m, uv; + float e, thresh; + + /* determine average energy across spectrum */ + + e = 1E-12; + for(m=1; m<=model->L; m++) + e += model->A[m]*model->A[m]; + + assert(e > 0.0); + e = 10.0*log10f(e/model->L); + + /* If beneath threhold, update bg estimate. The idea + of the threshold is to prevent updating during high level + speech. */ + + if ((e < BG_THRESH) && !model->voiced) + *bg_est = *bg_est*(1.0 - BG_BETA) + e*BG_BETA; + + /* now mess with phases during voiced frames to make any harmonics + less then our background estimate unvoiced. + */ + + uv = 0; + thresh = exp10f((*bg_est + BG_MARGIN)/20.0); + if (model->voiced) + for(m=1; m<=model->L; m++) + if (model->A[m] < thresh) + { + model->phi[m] = (TWO_PI/CODEC2_RAND_MAX)*(float)codec2_rand(); + uv++; + } +} + +C2CONST CCodec2::c2const_create(int Fs, float framelength_s) +{ + C2CONST c2const; + + assert((Fs == 8000) || (Fs = 16000)); + c2const.Fs = Fs; + c2const.n_samp = round(Fs*framelength_s); + c2const.max_amp = floor(Fs*P_MAX_S/2); + c2const.p_min = floor(Fs*P_MIN_S); + c2const.p_max = floor(Fs*P_MAX_S); + c2const.m_pitch = floor(Fs*M_PITCH_S); + c2const.Wo_min = TWO_PI/c2const.p_max; + c2const.Wo_max = TWO_PI/c2const.p_min; + + if (Fs == 8000) + { + c2const.nw = 279; + } + else + { + c2const.nw = 511; /* actually a bit shorter in time but lets us maintain constant FFT size */ + } + + c2const.tw = Fs*TW_S; + + /* + fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch); + fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max); + fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max); + fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw); + */ + + return c2const; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: make_analysis_window + AUTHOR......: David Rowe + DATE CREATED: 11/5/94 + + Init function that generates the time domain analysis window and it's DFT. + +\*---------------------------------------------------------------------------*/ + +void CCodec2::make_analysis_window(C2CONST *c2const, FFT_STATE *fft_fwd_cfg, float w[], float W[]) +{ + float m; + std::complex wshift[FFT_ENC]; + int i,j; + int m_pitch = c2const->m_pitch; + int nw = c2const->nw; + + /* + Generate Hamming window centered on M-sample pitch analysis window + + 0 M/2 M-1 + |-------------|-------------| + |-------|-------| + nw samples + + All our analysis/synthsis is centred on the M/2 sample. + */ + + m = 0.0; + for(i=0; i temp[FFT_ENC]; + + for(i=0; i(0.0f, 0.0f); + } + for(i=0; i Sw[], float Sn[], float w[]) +{ + int i; + int m_pitch = c2const->m_pitch; + int nw = c2const->nw; + + for(i=0; i(0.0f, 0.0f); + } + + /* Centre analysis window on time axis, we need to arrange input + to FFT this way to make FFT phases correct */ + + /* move 2nd half to start of FFT input vector */ + + for(i=0; i Sw[]) +{ + float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */ + + /* Coarse refinement */ + + pmax = TWO_PI/model->Wo + 5; + pmin = TWO_PI/model->Wo - 5; + pstep = 1.0; + hs_pitch_refinement(model, Sw, pmin, pmax, pstep); + + /* Fine refinement */ + + pmax = TWO_PI/model->Wo + 1; + pmin = TWO_PI/model->Wo - 1; + pstep = 0.25; + hs_pitch_refinement(model,Sw,pmin,pmax,pstep); + + /* Limit range */ + + if (model->Wo < TWO_PI/c2const->p_max) + model->Wo = TWO_PI/c2const->p_max; + if (model->Wo > TWO_PI/c2const->p_min) + model->Wo = TWO_PI/c2const->p_min; + + model->L = floorf(PI/model->Wo); + + /* trap occasional round off issues with floorf() */ + if (model->Wo*model->L >= 0.95*PI) + { + model->L--; + } + assert(model->Wo*model->L < PI); +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: hs_pitch_refinement + AUTHOR......: David Rowe + DATE CREATED: 27/5/94 + + Harmonic sum pitch refinement function. + + pmin pitch search range minimum + pmax pitch search range maximum + step pitch search step size + model current pitch estimate in model.Wo + + model refined pitch estimate in model.Wo + +\*---------------------------------------------------------------------------*/ + +void CCodec2::hs_pitch_refinement(MODEL *model, std::complex Sw[], float pmin, float pmax, float pstep) +{ + int m; /* loop variable */ + int b; /* bin for current harmonic centre */ + float E; /* energy for current pitch*/ + float Wo; /* current "test" fundamental freq. */ + float Wom; /* Wo that maximises E */ + float Em; /* mamimum energy */ + float r, one_on_r; /* number of rads/bin */ + float p; /* current pitch */ + + /* Initialisation */ + + model->L = PI/model->Wo; /* use initial pitch est. for L */ + Wom = model->Wo; + Em = 0.0; + r = TWO_PI/FFT_ENC; + one_on_r = 1.0/r; + + /* Determine harmonic sum for a range of Wo values */ + + for(p=pmin; p<=pmax; p+=pstep) + { + E = 0.0; + Wo = TWO_PI/p; + + /* Sum harmonic magnitudes */ + for(m=1; m<=model->L; m++) + { + b = (int)(m*Wo*one_on_r + 0.5); + E += Sw[b].real() * Sw[b].real() + Sw[b].imag() * Sw[b].imag(); + } + /* Compare to see if this is a maximum */ + + if (E > Em) + { + Em = E; + Wom = Wo; + } + } + + model->Wo = Wom; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: estimate_amplitudes + AUTHOR......: David Rowe + DATE CREATED: 27/5/94 + + Estimates the complex amplitudes of the harmonics. + +\*---------------------------------------------------------------------------*/ + +void CCodec2::estimate_amplitudes(MODEL *model, std::complex Sw[], int est_phase) +{ + int i,m; /* loop variables */ + int am,bm; /* bounds of current harmonic */ + float den; /* denominator of amplitude expression */ + + float r = TWO_PI/FFT_ENC; + float one_on_r = 1.0/r; + + for(m=1; m<=model->L; m++) + { + /* Estimate ampltude of harmonic */ + + den = 0.0; + am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5); + bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5); + + for(i=am; iA[m] = sqrtf(den); + + if (est_phase) + { + int b = (int)(m*model->Wo/r + 0.5); /* DFT bin of centre of current harmonic */ + + /* Estimate phase of harmonic, this is expensive in CPU for + embedded devicesso we make it an option */ + + model->phi[m] = atan2f(Sw[b].imag(), Sw[b].real()); + } + } +} + +/*---------------------------------------------------------------------------*\ + + est_voicing_mbe() + + Returns the error of the MBE cost function for a fiven F0. + + Note: I think a lot of the operations below can be simplified as + W[].imag = 0 and has been normalised such that den always equals 1. + +\*---------------------------------------------------------------------------*/ + +float CCodec2::est_voicing_mbe( C2CONST *c2const, MODEL *model, std::complex Sw[], float W[]) +{ + int l,al,bl,m; /* loop variables */ + std::complex Am; /* amplitude sample for this band */ + int offset; /* centers Hw[] about current harmonic */ + float den; /* denominator of Am expression */ + float error; /* accumulated error between original and synthesised */ + float Wo; + float sig, snr; + float elow, ehigh, eratio; + float sixty; + std::complex Ew(0, 0); + + int l_1000hz = model->L*1000.0/(c2const->Fs/2); + sig = 1E-4; + for(l=1; l<=l_1000hz; l++) + { + sig += model->A[l]*model->A[l]; + } + + Wo = model->Wo; + error = 1E-4; + + /* Just test across the harmonics in the first 1000 Hz */ + + for(l=1; l<=l_1000hz; l++) + { + Am = std::complex(0.0f, 0.0f); + den = 0.0; + al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI); + bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI); + + /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ + + offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; + for(m=al; m V_THRESH) + model->voiced = 1; + else + model->voiced = 0; + + /* post processing, helps clean up some voicing errors ------------------*/ + + /* + Determine the ratio of low freqency to high frequency energy, + voiced speech tends to be dominated by low frequency energy, + unvoiced by high frequency. This measure can be used to + determine if we have made any gross errors. + */ + + int l_2000hz = model->L*2000.0/(c2const->Fs/2); + int l_4000hz = model->L*4000.0/(c2const->Fs/2); + elow = ehigh = 1E-4; + for(l=1; l<=l_2000hz; l++) + { + elow += model->A[l]*model->A[l]; + } + for(l=l_2000hz; l<=l_4000hz; l++) + { + ehigh += model->A[l]*model->A[l]; + } + eratio = 10.0*log10f(elow/ehigh); + + /* Look for Type 1 errors, strongly V speech that has been + accidentally declared UV */ + + if (model->voiced == 0) + if (eratio > 10.0) + model->voiced = 1; + + /* Look for Type 2 errors, strongly UV speech that has been + accidentally declared V */ + + if (model->voiced == 1) + { + if (eratio < -10.0) + model->voiced = 0; + + /* A common source of Type 2 errors is the pitch estimator + gives a low (50Hz) estimate for UV speech, which gives a + good match with noise due to the close harmoonic spacing. + These errors are much more common than people with 50Hz3 + pitch, so we have just a small eratio threshold. */ + + sixty = 60.0*TWO_PI/c2const->Fs; + if ((eratio < -4.0) && (model->Wo <= sixty)) + model->voiced = 0; + } + //printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0); + + return snr; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: make_synthesis_window + AUTHOR......: David Rowe + DATE CREATED: 11/5/94 + + Init function that generates the trapezoidal (Parzen) sythesis window. + +\*---------------------------------------------------------------------------*/ + +void CCodec2::make_synthesis_window(C2CONST *c2const, float Pn[]) +{ + int i; + float win; + int n_samp = c2const->n_samp; + int tw = c2const->tw; + + /* Generate Parzen window in time domain */ + + win = 0.0; + for(i=0; i Sw_[FFT_DEC/2+1]; /* DFT of synthesised signal */ + float sw_[FFT_DEC]; /* synthesised signal */ + + if (shift) + { + /* Update memories */ + for(i=0; iL; l++) + { + b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5); + if (b > ((FFT_DEC/2)-1)) + { + b = (FFT_DEC/2)-1; + } + Sw_[b] = std::polar(model->A[l], model->phi[l]); + } + + /* Perform inverse DFT */ + + kiss.fftri(*fftr_inv_cfg, Sw_,sw_); + + /* Overlap add to previous samples */ + + for(i=0; ivoiced && !prev->voiced && !next->voiced) + { + interp->voiced = 0; + } + + /* Wo depends on voicing of this and adjacent frames */ + + if (interp->voiced) + { + if (prev->voiced && next->voiced) + interp->Wo = (1.0 - weight)*prev->Wo + weight*next->Wo; + if (!prev->voiced && next->voiced) + interp->Wo = next->Wo; + if (prev->voiced && !next->voiced) + interp->Wo = prev->Wo; + } + else + { + interp->Wo = Wo_min; + } + interp->L = PI/interp->Wo; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: interp_energy() + AUTHOR......: David Rowe + DATE CREATED: 22 May 2012 + + Interpolates centre 10ms sample of energy given two samples 20ms + apart. + +\*---------------------------------------------------------------------------*/ + +float CCodec2::interp_energy(float prev_e, float next_e) +{ + //return powf(10.0, (log10f(prev_e) + log10f(next_e))/2.0); + return sqrtf(prev_e * next_e); //looks better is math. identical and faster math +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: interpolate_lsp_ver2() + AUTHOR......: David Rowe + DATE CREATED: 22 May 2012 + + Weighted interpolation of LSPs. + +\*---------------------------------------------------------------------------*/ + +void CCodec2::interpolate_lsp_ver2(float interp[], float prev[], float next[], float weight, int order) +{ + int i; + + for(i=0; i. +*/ + +#ifndef __CODEC2__ +#define __CODEC2__ + +#include + +#include "codec2_internal.h" +#include "defines.h" +#include "kiss_fft.h" +#include "nlp.h" +#include "quantise.h" + +#define CODEC2_MODE_3200 0 +#define CODEC2_MODE_1600 2 + +#ifndef CODEC2_MODE_EN_DEFAULT +#define CODEC2_MODE_EN_DEFAULT 1 +#endif + +#define CODEC2_RAND_MAX 32767 + +class CCodec2 +{ +public: + CCodec2(bool is_3200); + ~CCodec2(); + void codec2_encode(unsigned char *bits, const short *speech_in); + void codec2_decode(short *speech_out, const unsigned char *bits); + int codec2_samples_per_frame(); + int codec2_bits_per_frame(); + +private: + // merged from other files + void sample_phase(MODEL *model, std::complex filter_phase[], std::complex A[]); + void phase_synth_zero_order(int n_samp, MODEL *model, float *ex_phase, std::complex filter_phase[]); + void postfilter(MODEL *model, float *bg_est); + + C2CONST c2const_create(int Fs, float framelength_ms); + + void make_analysis_window(C2CONST *c2const, FFT_STATE *fft_fwd_cfg, float w[], float W[]); + void dft_speech(C2CONST *c2const, FFT_STATE &fft_fwd_cfg, std::complex Sw[], float Sn[], float w[]); + void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, std::complex Sw[]); + void estimate_amplitudes(MODEL *model, std::complex Sw[], int est_phase); + float est_voicing_mbe(C2CONST *c2const, MODEL *model, std::complex Sw[], float W[]); + void make_synthesis_window(C2CONST *c2const, float Pn[]); + void synthesise(int n_samp, FFTR_STATE *fftr_inv_cfg, float Sn_[], MODEL *model, float Pn[], int shift); + int codec2_rand(void); + void hs_pitch_refinement(MODEL *model, std::complex Sw[], float pmin, float pmax, float pstep); + + void interp_Wo(MODEL *interp, MODEL *prev, MODEL *next, float Wo_min); + void interp_Wo2(MODEL *interp, MODEL *prev, MODEL *next, float weight, float Wo_min); + float interp_energy(float prev, float next); + void interpolate_lsp_ver2(float interp[], float prev[], float next[], float weight, int order); + + void analyse_one_frame(MODEL *model, const short *speech); + void synthesise_one_frame(short speech[], MODEL *model, std::complex Aw[], float gain); + void codec2_encode_3200(unsigned char *bits, const short *speech); + void codec2_encode_1600(unsigned char *bits, const short *speech); + void codec2_decode_3200(short *speech, const unsigned char *bits); + void codec2_decode_1600(short *speech, const unsigned char *bits); + void ear_protection(float in_out[], int n); + void lsp_to_lpc(float *freq, float *ak, int lpcrdr); + + void (CCodec2::*encode)(unsigned char *bits, const short *speech); + void (CCodec2::*decode)(short *speech, const unsigned char *bits); + Cnlp nlp; + CQuantize qt; + CODEC2 c2; +}; + +#endif diff --git a/codec2/codec2_internal.h b/codec2/codec2_internal.h new file mode 100644 index 0000000..1b037e1 --- /dev/null +++ b/codec2/codec2_internal.h @@ -0,0 +1,67 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: codec2_internal.h + AUTHOR......: David Rowe + DATE CREATED: April 16 2012 + + Header file for Codec2 internal states, exposed via this header + file to assist in testing. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2012 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 2.1, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see . +*/ + +#ifndef __CODEC2_INTERNAL__ +#define __CODEC2_INTERNAL__ + +#include "kiss_fft.h" + +using CODEC2 = struct codec2_tag { + int mode; + int Fs; + int n_samp; + int m_pitch; + int gray; /* non-zero for gray encoding */ + int lpc_pf; /* LPC post filter on */ + int bass_boost; /* LPC post filter bass boost */ + int smoothing; /* enable smoothing for channels with errors */ + float ex_phase; /* excitation model phase track */ + float bg_est; /* background noise estimate for post filter */ + float prev_f0_enc; /* previous frame's f0 estimate */ + float prev_e_dec; /* previous frame's LPC energy */ + float beta; /* LPC post filter parameters */ + float gamma; + float xq_enc[2]; /* joint pitch and energy VQ states */ + float xq_dec[2]; + float W[FFT_ENC]; /* DFT of w[] */ + float hpf_states[2]; /* high pass filter states */ + float prev_lsps_dec[LPC_ORD]; /* previous frame's LSPs */ + float *softdec; /* optional soft decn bits from demod */ + MODEL prev_model_dec; /* previous frame's model parameters */ + C2CONST c2const; + FFT_STATE fft_fwd_cfg; /* forward FFT config */ + FFTR_STATE fftr_fwd_cfg; /* forward real FFT config */ + FFTR_STATE fftr_inv_cfg; /* inverse FFT config */ + std::vector w; /* [m_pitch] time domain hamming window */ + std::vector Pn; /* [2*n_samp] trapezoidal synthesis window */ + std::vector Sn; /* [m_pitch] input speech */ + std::vector Sn_; /* [2*n_samp] synthesised output speech */ + std::vector bpf_buf; /* buffer for band pass filter */ +}; + +#endif diff --git a/codec2/defines.h b/codec2/defines.h new file mode 100644 index 0000000..f201313 --- /dev/null +++ b/codec2/defines.h @@ -0,0 +1,127 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: defines.h + AUTHOR......: David Rowe + DATE CREATED: 23/4/93 + + Defines and structures used throughout the codec. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2009 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 2.1, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see . +*/ + +#ifndef __DEFINES__ +#define __DEFINES__ + +#include +#include + +/*---------------------------------------------------------------------------*\ + + DEFINES + +\*---------------------------------------------------------------------------*/ + +/* General defines */ + +#define N_S 0.01 /* internal proc frame length in secs */ +#define TW_S 0.005 /* trapezoidal synth window overlap */ +#define MAX_AMP 160 /* maximum number of harmonics */ +#ifndef PI +#define PI 3.141592654 /* mathematical constant */ +#endif +#define TWO_PI 6.283185307 /* mathematical constant */ +#define MAX_STR 2048 /* maximum string size */ + +#define FFT_ENC 512 /* size of FFT used for encoder */ +#define FFT_DEC 512 /* size of FFT used in decoder */ +#define V_THRESH 6.0 /* voicing threshold in dB */ +#define LPC_ORD 10 /* LPC order */ +#define LPC_ORD_LOW 6 /* LPC order for lower rates */ + +/* Pitch estimation defines */ + +#define M_PITCH_S 0.0400 /* pitch analysis window in s */ +#define P_MIN_S 0.0025 /* minimum pitch period in s */ +#define P_MAX_S 0.0200 /* maximum pitch period in s */ +#define MAXFACTORS 32 // e.g. an fft of length 128 has 4 factors + // as far as kissfft is concerned 4*4*4*2 + +/*---------------------------------------------------------------------------*\ + + TYPEDEFS + +\*---------------------------------------------------------------------------*/ + +/* Structure to hold constants calculated at run time based on sample rate */ + +using C2CONST = struct c2const_tag +{ + int Fs; /* sample rate of this instance */ + int n_samp; /* number of samples per 10ms frame at Fs */ + int max_amp; /* maximum number of harmonics */ + int m_pitch; /* pitch estimation window size in samples */ + int p_min; /* minimum pitch period in samples */ + int p_max; /* maximum pitch period in samples */ + float Wo_min; + float Wo_max; + int nw; /* analysis window size in samples */ + int tw; /* trapezoidal synthesis window overlap */ +}; + +/* Structure to hold model parameters for one frame */ + +using MODEL = struct model_tag +{ + float Wo; /* fundamental frequency estimate in radians */ + int L; /* number of harmonics */ + float A[MAX_AMP+1]; /* amplitiude of each harmonic */ + float phi[MAX_AMP+1]; /* phase of each harmonic */ + int voiced; /* non-zero if this frame is voiced */ +}; + +/* describes each codebook */ + +struct lsp_codebook +{ + int k; /* dimension of vector */ + int log2m; /* number of bits in m */ + int m; /* elements in codebook */ + float *cb; /* The elements */ +}; + +using FFT_STATE = struct fft_state_tag +{ + int nfft; + bool inverse; + int factors[2*MAXFACTORS]; + std::vector> twiddles; +}; + +using FFTR_STATE = struct fftr_state_tag +{ + FFT_STATE substate; + std::vector> tmpbuf; + std::vector> super_twiddles; +}; + +extern const struct lsp_codebook lsp_cb[]; +extern const struct lsp_codebook lsp_cbd[]; +extern const struct lsp_codebook ge_cb[]; + +#endif diff --git a/codec2/kiss_fft.cpp b/codec2/kiss_fft.cpp new file mode 100644 index 0000000..cefb846 --- /dev/null +++ b/codec2/kiss_fft.cpp @@ -0,0 +1,435 @@ +/* +Copyright (c) 2003-2010, Mark Borgerding + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include + +#include "defines.h" +#include "kiss_fft.h" + +void CKissFFT::kf_bfly2(std::complex *Fout, const size_t fstride, FFT_STATE &st, int m) +{ + std::complex *Fout2; + std::complex *tw1 = st.twiddles.data(); + std::complex t; + Fout2 = Fout + m; + do + { + t = *Fout2 * *tw1; + tw1 += fstride; + *Fout2 = *Fout - t; + *Fout += t; + ++Fout2; + ++Fout; + } + while (--m); +} + +void CKissFFT::kf_bfly3(std::complex * Fout, const size_t fstride, FFT_STATE &st, int m) +{ + const size_t m2 = 2 * m; + std::complex *tw1,*tw2; + std::complex scratch[5]; + std::complex epi3; + epi3 = st.twiddles[fstride*m]; + + tw1 = tw2 = st.twiddles.data(); + + do + { + scratch[1] = Fout[m] * *tw1; + scratch[2] = Fout[m2] * *tw2; + + scratch[3] = scratch[1] + scratch[2]; + scratch[0] = scratch[1] - scratch[2]; + tw1 += fstride; + tw2 += fstride*2; + + Fout[m] = *Fout - (0.5f * scratch[3]); + + scratch[0] *= epi3.imag(); + + *Fout += scratch[3]; + + Fout[m2].real(Fout[m].real() + scratch[0].imag()); + Fout[m2].imag(Fout[m].imag() - scratch[0].real()); + + Fout[m].real(Fout[m].real() - scratch[0].imag()); + Fout[m].imag(Fout[m].imag() + scratch[0].real()); + + ++Fout; + } + while(--m); +} + +void CKissFFT::kf_bfly4(std::complex *Fout, const size_t fstride, FFT_STATE &st, int m) +{ + std::complex *tw1,*tw2,*tw3; + std::complex scratch[6]; + int k = m; + const int m2 = 2 * m; + const int m3 = 3 * m; + + + tw3 = tw2 = tw1 = st.twiddles.data(); + + do + { + scratch[0] = Fout[m] * *tw1; + scratch[1] = Fout[m2] * *tw2; + scratch[2] = Fout[m3] * *tw3; + + scratch[5] = *Fout - scratch[1]; + *Fout += scratch[1]; + scratch[3] = scratch[0] + scratch[2]; + scratch[4] = scratch[0] - scratch[2]; + Fout[m2] = *Fout - scratch[3]; + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + *Fout += scratch[3]; + + if(st.inverse) + { + Fout[m].real(scratch[5].real() - scratch[4].imag()); + Fout[m].imag(scratch[5].imag() + scratch[4].real()); + Fout[m3].real(scratch[5].real() + scratch[4].imag()); + Fout[m3].imag(scratch[5].imag() - scratch[4].real()); + } + else + { + Fout[m].real(scratch[5].real() + scratch[4].imag()); + Fout[m].imag(scratch[5].imag() - scratch[4].real()); + Fout[m3].real(scratch[5].real() - scratch[4].imag()); + Fout[m3].imag(scratch[5].imag() + scratch[4].real()); + } + ++Fout; + } + while(--k); +} + +void CKissFFT::kf_bfly5(std::complex * Fout, const size_t fstride, FFT_STATE &st, int m) +{ + std::complex scratch[13]; + std::complex *twiddles = st.twiddles.data(); + auto ya = twiddles[fstride*m]; + auto yb = twiddles[fstride*2*m]; + + auto Fout0 = Fout; + auto Fout1 = Fout0 + m; + auto Fout2 = Fout0 + 2 * m; + auto Fout3 = Fout0 + 3 * m; + auto Fout4 = Fout0 + 4 * m; + + auto tw = st.twiddles.data(); + for (int u=0; u *Fout, const size_t fstride, FFT_STATE &st, int m, int p) +{ + auto twiddles = st.twiddles.data(); + std::complex t; + int Norig = st.nfft; + + std::vector> scratch(p); + + for (int u=0; u= Norig) twidx-=Norig; + t = scratch[q] * twiddles[twidx]; + Fout[k] += t; + } + k += m; + } + } + scratch.clear(); +} + +void CKissFFT::kf_work(std::complex *Fout, const std::complex *f, const size_t fstride, int in_stride, int *factors, FFT_STATE &st) +{ + auto Fout_beg = Fout; + const int p = *factors++; /* the radix */ + const int m = *factors++; /* stage's fft length/p */ + const std::complex *Fout_end = Fout + p*m; + + if (m==1) + { + do + { + *Fout = *f; + f += fstride*in_stride; + } + while( ++Fout != Fout_end ); + } + else + { + do + { + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + kf_work( Fout, f, fstride*p, in_stride, factors, st); + f += fstride*in_stride; + } + while( (Fout += m) != Fout_end ); + } + + Fout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) + { + case 2: + kf_bfly2(Fout,fstride,st,m); + break; + case 3: + kf_bfly3(Fout,fstride,st,m); + break; + case 4: + kf_bfly4(Fout,fstride,st,m); + break; + case 5: + kf_bfly5(Fout,fstride,st,m); + break; + default: + kf_bfly_generic(Fout,fstride,st,m,p); + break; + } +} + +/* facbuf is populated by p1,m1,p2,m2, ... + where + p[i] * m[i] = m[i-1] + m0 = n */ +void CKissFFT::kf_factor(int n,int * facbuf) +{ + int p=4; + double floor_sqrt; + floor_sqrt = floorf( sqrtf((double)n) ); + + /*factor out powers of 4, powers of 2, then any remaining primes */ + do + { + while (n % p) + { + switch (p) + { + case 4: + p = 2; + break; + case 2: + p = 3; + break; + default: + p += 2; + break; + } + if (p > floor_sqrt) + p = n; /* no more factors, skip to end */ + } + n /= p; + *facbuf++ = p; + *facbuf++ = n; + } + while (n > 1); +} + +void CKissFFT::fft_alloc(FFT_STATE &state, const int nfft, bool inverse_fft) +{ + state.twiddles.resize(nfft); + + state.nfft = nfft; + state.inverse = inverse_fft; + + for (int i=0; i *fin, std::complex *fout, int in_stride) +{ + if (fin == fout) + { + //NOTE: this is not really an in-place FFT algorithm. + //It just performs an out-of-place FFT into a temp buffer + std::vector> tmpbuf(st.nfft); + kf_work(tmpbuf.data(), fin, true, in_stride, st.factors, st); + memcpy(fout, tmpbuf.data(), sizeof(std::complex)*st.nfft); + tmpbuf.clear(); + } + else + { + kf_work(fout, fin, 1, in_stride, st.factors, st); + } +} + +void CKissFFT::fft(FFT_STATE &cfg, const std::complex *fin, std::complex *fout) +{ + fft_stride(cfg, fin, fout, 1); +} + +int CKissFFT::fft_next_fast_size(int n) +{ + while(1) + { + int m = n; + while ( (m % 2) == 0 ) m /= 2; + while ( (m % 3) == 0 ) m /= 3; + while ( (m % 5) == 0 ) m /= 5; + if (m <= 1) + break; /* n is completely factorable by twos, threes, and fives */ + n++; + } + return n; +} + +void CKissFFT::fftr_alloc(FFTR_STATE &st, int nfft, const bool inverse_fft) +{ + nfft >>= 1; + + fft_alloc(st.substate, nfft, inverse_fft); + st.tmpbuf.resize(nfft); + st.super_twiddles.resize(nfft); + + for (int i=0; i *freqdata) +{ + assert(st.substate.inverse == false); + + auto ncfft = st.substate.nfft; + + /*perform the parallel fft of two real signals packed in real,imag*/ + fft( st.substate, (const std::complex*)timedata, st.tmpbuf.data()); + /* The real part of the DC element of the frequency spectrum in st->tmpbuf + * contains the sum of the even-numbered elements of the input time sequence + * The imag part is the sum of the odd-numbered elements + * + * The sum of tdc.r and tdc.i is the sum of the input time sequence. + * yielding DC of input time sequence + * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... + * yielding Nyquist bin of input time sequence + */ + + auto tdc = st.tmpbuf[0]; + freqdata[0].real(tdc.real() + tdc.imag()); + freqdata[ncfft].real(tdc.real() - tdc.imag()); + freqdata[ncfft].imag(0.f); + freqdata[0].imag(0.f); + + for (int k=1; k <= ncfft/2; ++k) + { + auto fpk = st.tmpbuf[k]; + auto fpnk = std::conj(st.tmpbuf[ncfft-k]); + + auto f1k = fpk + fpnk; + auto f2k = fpk - fpnk; + auto tw = f2k * st.super_twiddles[k-1]; + + freqdata[k] = 0.5f * (f1k + tw); + freqdata[ncfft-k].real(0.5f * (f1k.real() - tw.real())); + freqdata[ncfft-k].imag(0.5f * (tw.imag() - f1k.imag())); + } +} + +void CKissFFT::fftri(FFTR_STATE &st, const std::complex *freqdata, float *timedata) +{ + assert(st.substate.inverse == true); + + auto ncfft = st.substate.nfft; + + st.tmpbuf[0].real(freqdata[0].real() + freqdata[ncfft].real()); + st.tmpbuf[0].imag(freqdata[0].real() - freqdata[ncfft].real()); + + for (int k=1; k <= ncfft/2; ++k) + { + auto fk = freqdata[k]; + auto fnkc = std::conj(freqdata[ncfft - k]); + + auto fek = fk + fnkc; + auto tmp = fk - fnkc; + auto fok = tmp * st.super_twiddles[k-1]; + st.tmpbuf[k] = fek + fok; + st.tmpbuf[ncfft - k] = std::conj(fek - fok); + } + fft (st.substate, st.tmpbuf.data(), (std::complex *)timedata); +} diff --git a/codec2/kiss_fft.h b/codec2/kiss_fft.h new file mode 100644 index 0000000..7626617 --- /dev/null +++ b/codec2/kiss_fft.h @@ -0,0 +1,35 @@ +#ifndef KISS_FFT_H +#define KISS_FFT_H + +#include + +#include +#include +#include +#include + +#include "defines.h" + +/* for real ffts, we need an even size */ +#define kiss_fftr_next_fast_size_real(n) (kiss_fft_next_fast_size( ((n)+1) >> 1) << 1 ) + +class CKissFFT +{ +public: + void fft_alloc(FFT_STATE &state, const int nfft, const bool inverse_fft); + void fft(FFT_STATE &cfg, const std::complex *fin, std::complex *fout); + void fft_stride(FFT_STATE &cfg, const std::complex *fin, std::complex *fout, int fin_stride); + int fft_next_fast_size(int n); + void fftr_alloc(FFTR_STATE &state, int nfft, const bool inverse_fft); + void fftr(FFTR_STATE &cfg,const float *timedata,std::complex *freqdata); + void fftri(FFTR_STATE &cfg,const std::complex *freqdata,float *timedata); +private: + void kf_bfly2(std::complex *Fout, const size_t fstride, FFT_STATE &st, int m); + void kf_bfly3(std::complex *Fout, const size_t fstride, FFT_STATE &st, int m); + void kf_bfly4(std::complex *Fout, const size_t fstride, FFT_STATE &st, int m); + void kf_bfly5(std::complex *Fout, const size_t fstride, FFT_STATE &st, int m); + void kf_bfly_generic(std::complex *Fout, const size_t fstride, FFT_STATE &st, int m, int p); + void kf_work(std::complex *Fout, const std::complex *f, const size_t fstride, int in_stride, int *factors, FFT_STATE &st); + void kf_factor(int n, int *facbuf); +}; +#endif diff --git a/codec2/lpc.cpp b/codec2/lpc.cpp new file mode 100644 index 0000000..10f2c09 --- /dev/null +++ b/codec2/lpc.cpp @@ -0,0 +1,311 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: lpc.c + AUTHOR......: David Rowe + DATE CREATED: 30 Sep 1990 (!) + + Linear Prediction functions written in C. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2009-2012 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 2.1, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see . +*/ + +#define LPC_MAX_N 512 /* maximum no. of samples in frame */ +#define PI 3.141592654 /* mathematical constant */ + +#define ALPHA 1.0 +#define BETA 0.94 + +#include +#include +#include "defines.h" +#include "lpc.h" + +/*---------------------------------------------------------------------------*\ + + pre_emp() + + Pre-emphasise (high pass filter with zero close to 0 Hz) a frame of + speech samples. Helps reduce dynamic range of LPC spectrum, giving + greater weight and hense a better match to low energy formants. + + Should be balanced by de-emphasis of the output speech. + +\*---------------------------------------------------------------------------*/ + +void Clpc::pre_emp( + float Sn_pre[], /* output frame of speech samples */ + float Sn[], /* input frame of speech samples */ + float *mem, /* Sn[-1]single sample memory */ + int Nsam /* number of speech samples to use */ +) +{ + int i; + + for(i=0; i 1.0) + k = 0.0; + + a[i][i] = k; + + for(j=1; j<=i-1; j++) + a[i][j] = a[i-1][j] + k*a[i-1][i-j]; /* Equation 38c, Makhoul */ + + e *= (1-k*k); /* Equation 38d, Makhoul */ + } + + for(i=1; i<=order; i++) + lpcs[i] = a[order][i]; + lpcs[0] = 1.0; +} + +/*---------------------------------------------------------------------------*\ + + inverse_filter() + + Inverse Filter, A(z). Produces an array of residual samples from an array + of input samples and linear prediction coefficients. + + The filter memory is stored in the first order samples of the input array. + +\*---------------------------------------------------------------------------*/ + +void Clpc::inverse_filter( + float Sn[], /* Nsam input samples */ + float a[], /* LPCs for this frame of samples */ + int Nsam, /* number of samples */ + float res[], /* Nsam residual samples */ + int order /* order of LPC */ +) +{ + int i,j; /* loop variables */ + + for(i=0; i. +*/ + +#ifndef __LPC__ +#define __LPC__ + +#define LPC_MAX_ORDER 20 + +class Clpc { +public: + void autocorrelate(float Sn[], float Rn[], int Nsam, int order); + void levinson_durbin(float R[], float lpcs[], int order); +private: + void pre_emp(float Sn_pre[], float Sn[], float *mem, int Nsam); + void de_emp(float Sn_se[], float Sn[], float *mem, int Nsam); + void hanning_window(float Sn[], float Wn[], int Nsam); + void inverse_filter(float Sn[], float a[], int Nsam, float res[], int order); + void synthesis_filter(float res[], float a[], int Nsam, int order, float Sn_[]); + void find_aks(float Sn[], float a[], int Nsam, int order, float *E); + void weight(float ak[], float gamma, int order, float akw[]); +}; + +#endif diff --git a/codec2/nlp.cpp b/codec2/nlp.cpp new file mode 100644 index 0000000..753e96b --- /dev/null +++ b/codec2/nlp.cpp @@ -0,0 +1,520 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: nlp.c + AUTHOR......: David Rowe + DATE CREATED: 23/3/93 + + Non Linear Pitch (NLP) estimation functions. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2009 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 2.1, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see . +*/ + +#include +#include +#include + +#include "defines.h" +#include "nlp.h" +#include "kiss_fft.h" + +extern CKissFFT kiss; + +/*---------------------------------------------------------------------------*\ + + GLOBALS + +\*---------------------------------------------------------------------------*/ + +/* 48 tap 600Hz low pass FIR filter coefficients */ + +static const float nlp_fir[] = +{ + -1.0818124e-03, + -1.1008344e-03, + -9.2768838e-04, + -4.2289438e-04, + 5.5034190e-04, + 2.0029849e-03, + 3.7058509e-03, + 5.1449415e-03, + 5.5924666e-03, + 4.3036754e-03, + 8.0284511e-04, + -4.8204610e-03, + -1.1705810e-02, + -1.8199275e-02, + -2.2065282e-02, + -2.0920610e-02, + -1.2808831e-02, + 3.2204775e-03, + 2.6683811e-02, + 5.5520624e-02, + 8.6305944e-02, + 1.1480192e-01, + 1.3674206e-01, + 1.4867556e-01, + 1.4867556e-01, + 1.3674206e-01, + 1.1480192e-01, + 8.6305944e-02, + 5.5520624e-02, + 2.6683811e-02, + 3.2204775e-03, + -1.2808831e-02, + -2.0920610e-02, + -2.2065282e-02, + -1.8199275e-02, + -1.1705810e-02, + -4.8204610e-03, + 8.0284511e-04, + 4.3036754e-03, + 5.5924666e-03, + 5.1449415e-03, + 3.7058509e-03, + 2.0029849e-03, + 5.5034190e-04, + -4.2289438e-04, + -9.2768838e-04, + -1.1008344e-03, + -1.0818124e-03 +}; + +static const float fdmdv_os_filter[]= { + -0.0008215855034550382, + -0.0007833023901802921, + 0.001075563790768233, + 0.001199092367787555, + -0.001765309502928316, + -0.002055372115328064, + 0.002986877604154257, + 0.003462567920638414, + -0.004856570111126334, + -0.005563143845031497, + 0.007533613299748122, + 0.008563932468880897, + -0.01126857129039911, + -0.01280782411693687, + 0.01651443896361847, + 0.01894875110322284, + -0.02421604439474981, + -0.02845107338464062, + 0.03672973563400258, + 0.04542046150312214, + -0.06189165826716491, + -0.08721876380763803, + 0.1496157094199961, + 0.4497962274137046, + 0.4497962274137046, + 0.1496157094199961, + -0.08721876380763803, + -0.0618916582671649, + 0.04542046150312216, + 0.03672973563400257, + -0.02845107338464062, + -0.02421604439474984, + 0.01894875110322284, + 0.01651443896361848, + -0.01280782411693687, + -0.0112685712903991, + 0.008563932468880899, + 0.007533613299748123, + -0.005563143845031501, + -0.004856570111126346, + 0.003462567920638419, + 0.002986877604154259, + -0.002055372115328063, + -0.001765309502928318, + 0.001199092367787557, + 0.001075563790768233, + -0.0007833023901802925, + -0.0008215855034550383 +}; + +/*---------------------------------------------------------------------------*\ + + nlp_create() + + Initialisation function for NLP pitch estimator. + +\*---------------------------------------------------------------------------*/ + +void Cnlp::nlp_create(C2CONST *c2const) +{ + int i; + int m = c2const->m_pitch; + int Fs = c2const->Fs; + + assert((Fs == 8000) || (Fs == 16000)); + snlp.Fs = Fs; + + snlp.m = m; + + /* if running at 16kHz allocate storage for decimating filter memory */ + + if (Fs == 16000) + { + snlp.Sn16k.resize(FDMDV_OS_TAPS_16K + c2const->n_samp); + for(i=0; i Sw[], /* Freq domain version of Sn[] */ +// float W[], /* Freq domain window */ + float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */ +) +{ + float notch; /* current notch filter output */ + std::complex Fw[PE_FFT_SIZE]; /* DFT of squared signal (input/output) */ + float gmax; + int gmax_bin; + int m, i, j; + float best_f0; + + m = snlp.m; + + /* Square, notch filter at DC, and LP filter vector */ + + /* If running at 16 kHz decimate to 8 kHz, as NLP ws designed for + Fs = 8kHz. The decimating filter introduces about 3ms of delay, + that shouldn't be a problem as pitch changes slowly. */ + + if (snlp.Fs == 8000) + { + /* Square latest input samples */ + + for(i=m-n; i gmax) + { + gmax = Fw[i].real(); + gmax_bin = i; + } + } + + best_f0 = post_process_sub_multiples(Fw, pmax, gmax, gmax_bin, prev_f0); + + /* Shift samples in buffer to make room for new samples */ + + for(i=0; i Fw[], int pmax, float gmax, int gmax_bin, float *prev_f0) +{ + int min_bin, cmax_bin; + int mult; + float thresh, best_f0; + int b, bmin, bmax, lmax_bin; + float lmax; + int prev_f0_bin; + + /* post process estimate by searching submultiples */ + + mult = 2; + min_bin = PE_FFT_SIZE*DEC/pmax; + cmax_bin = gmax_bin; + prev_f0_bin = *prev_f0*(PE_FFT_SIZE*DEC)/SAMPLE_RATE; + + while(gmax_bin/mult >= min_bin) + { + + b = gmax_bin/mult; /* determine search interval */ + bmin = 0.8*b; + bmax = 1.2*b; + if (bmin < min_bin) + bmin = min_bin; + + /* lower threshold to favour previous frames pitch estimate, + this is a form of pitch tracking */ + + if ((prev_f0_bin > bmin) && (prev_f0_bin < bmax)) + thresh = CNLP*0.5*gmax; + else + thresh = CNLP*gmax; + + lmax = 0; + lmax_bin = bmin; + for (b=bmin; b<=bmax; b++) /* look for maximum in interval */ + if (Fw[b].real() > lmax) + { + lmax = Fw[b].real(); + lmax_bin = b; + } + + if (lmax > thresh) + if ((lmax > Fw[lmax_bin-1].real()) && (lmax > Fw[lmax_bin+1].real())) + { + cmax_bin = lmax_bin; + } + + mult++; + } + + best_f0 = (float)cmax_bin*SAMPLE_RATE/(PE_FFT_SIZE*DEC); + + return best_f0; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: fdmdv_16_to_8() + AUTHOR......: David Rowe + DATE CREATED: 9 May 2012 + + Changes the sample rate of a signal from 16 to 8 kHz. + + n is the number of samples at the 8 kHz rate, there are FDMDV_OS*n + samples at the 48 kHz rate. As above however a memory of + FDMDV_OS_TAPS samples is reqd for in16k[] (see t16_8.c unit test as example). + + Low pass filter the 16 kHz signal at 4 kHz using the same filter as + the upsampler, then just output every FDMDV_OS-th filtered sample. + + Note: this function copied from fdmdv.c, included in nlp.c as a convenience + to avoid linking with another source file. + +\*---------------------------------------------------------------------------*/ + +void Cnlp::fdmdv_16_to_8(float out8k[], float in16k[], int n) +{ + float acc; + int i,j,k; + + for(i=0, k=0; k *inout) +{ + std::complex in[512]; + // decide whether to use the local stack based buffer for in + // or to allow kiss_fft to allocate RAM + // second part is just to play safe since first method + // is much faster and uses less RAM + if (cfg.nfft <= 512) + { + memcpy(in, inout, cfg.nfft*sizeof(std::complex)); + kiss.fft(cfg, in, inout); + } + else + { + kiss.fft(cfg, inout, inout); + } +} diff --git a/codec2/nlp.h b/codec2/nlp.h new file mode 100644 index 0000000..e531a90 --- /dev/null +++ b/codec2/nlp.h @@ -0,0 +1,87 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: nlp.c + AUTHOR......: David Rowe + DATE CREATED: 23/3/93 + + Non Linear Pitch (NLP) estimation functions. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2009 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 2.1, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see . +*/ + +#ifndef __NLP__ +#define __NLP__ + +#include +#include + +#include "defines.h" + +/*---------------------------------------------------------------------------*\ + + DEFINES + +\*---------------------------------------------------------------------------*/ + +#define PMAX_M 320 /* maximum NLP analysis window size */ +#define COEFF 0.95 /* notch filter parameter */ +#define PE_FFT_SIZE 512 /* DFT size for pitch estimation */ +#define DEC 5 /* decimation factor */ +#define SAMPLE_RATE 8000 +#define PI 3.141592654 /* mathematical constant */ +#define T 0.1 /* threshold for local minima candidate */ +#define F0_MAX 500 +#define CNLP 0.3 /* post processor constant */ +#define NLP_NTAP 48 /* Decimation LPF order */ + +/* 8 to 16 kHz sample rate conversion */ + +#define FDMDV_OS 2 /* oversampling rate */ +#define FDMDV_OS_TAPS_16K 48 /* number of OS filter taps at 16kHz */ +#define FDMDV_OS_TAPS_8K (FDMDV_OS_TAPS_16K/FDMDV_OS) /* number of OS filter taps at 8kHz */ + + +using NLP = struct nlp_tag +{ + int Fs; /* sample rate in Hz */ + int m; + float w[PMAX_M/DEC]; /* DFT window */ + float sq[PMAX_M]; /* squared speech samples */ + float mem_x,mem_y; /* memory for notch filter */ + float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */ + FFT_STATE fft_cfg; /* kiss FFT config */ + std::vector Sn16k; /* Fs=16kHz input speech vector */ +}; + + +class Cnlp { +public: + void nlp_create(C2CONST *c2const); + void nlp_destroy(); + float nlp(float Sn[], int n, float *pitch_samples, float *prev_f0); + void codec2_fft_inplace(FFT_STATE &cfg, std::complex *inout); + +private: + float post_process_sub_multiples(std::complex Fw[], int pmax, float gmax, int gmax_bin, float *prev_f0); + void fdmdv_16_to_8(float out8k[], float in16k[], int n); + + NLP snlp; +}; + +#endif diff --git a/codec2/pack.cpp b/codec2/pack.cpp new file mode 100644 index 0000000..8382f95 --- /dev/null +++ b/codec2/pack.cpp @@ -0,0 +1,139 @@ +/* + Copyright (C) 2010 Perens LLC + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 2.1, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see . +*/ + +#include "defines.h" +#include "quantise.h" +#include + +/* Compile-time constants */ +/* Size of unsigned char in bits. Assumes 8 bits-per-char. */ +static const unsigned int WordSize = 8; + +/* Mask to pick the bit component out of bitIndex. */ +static const unsigned int IndexMask = 0x7; + +/* Used to pick the word component out of bitIndex. */ +static const unsigned int ShiftRight = 3; + +/** Pack a bit field into a bit string, encoding the field in Gray code. + * + * The output is an array of unsigned char data. The fields are efficiently + * packed into the bit string. The Gray coding is a naive attempt to reduce + * the effect of single-bit errors, we expect to do a better job as the + * codec develops. + * + * This code would be simpler if it just set one bit at a time in the string, + * but would hit the same cache line more often. I'm not sure the complexity + * gains us anything here. + * + * Although field is currently of int type rather than unsigned for + * compatibility with the rest of the code, indices are always expected to + * be >= 0. + */ +void CQuantize::pack( + unsigned char *bitArray, /* The output bit string. */ + unsigned int *bitIndex, /* Index into the string in BITS, not bytes.*/ + int field, /* The bit field to be packed. */ + unsigned int fieldWidth /* Width of the field in BITS, not bytes. */ +) +{ + pack_natural_or_gray(bitArray, bitIndex, field, fieldWidth, 1); +} + +void CQuantize::pack_natural_or_gray( + unsigned char *bitArray, /* The output bit string. */ + unsigned int *bitIndex, /* Index into the string in BITS, not bytes.*/ + int field, /* The bit field to be packed. */ + unsigned int fieldWidth, /* Width of the field in BITS, not bytes. */ + unsigned int gray /* non-zero for gray coding */ +) +{ + if (gray) + { + /* Convert the field to Gray code */ + field = (field >> 1) ^ field; + } + + do + { + unsigned int bI = *bitIndex; + unsigned int bitsLeft = WordSize - (bI & IndexMask); + unsigned int sliceWidth = bitsLeft < fieldWidth ? bitsLeft : fieldWidth; + unsigned int wordIndex = bI >> ShiftRight; + + bitArray[wordIndex] |= ((unsigned char)((field >> (fieldWidth - sliceWidth)) << (bitsLeft - sliceWidth))); + + *bitIndex = bI + sliceWidth; + fieldWidth -= sliceWidth; + } + while ( fieldWidth != 0 ); +} + +/** Unpack a field from a bit string, converting from Gray code to binary. + * + */ +int CQuantize::unpack( + const unsigned char *bitArray, /* The input bit string. */ + unsigned int *bitIndex, /* Index into the string in BITS, not bytes.*/ + unsigned int fieldWidth/* Width of the field in BITS, not bytes. */ +) +{ + return unpack_natural_or_gray(bitArray, bitIndex, fieldWidth, 1); +} + +/** Unpack a field from a bit string, to binary, optionally using + * natural or Gray code. + * + */ +int CQuantize::unpack_natural_or_gray( + const unsigned char *bitArray, /* The input bit string. */ + unsigned int *bitIndex, /* Index into the string in BITS, not bytes.*/ + unsigned int fieldWidth,/* Width of the field in BITS, not bytes. */ + unsigned int gray /* non-zero for Gray coding */ +) +{ + unsigned int field = 0; + unsigned int t; + + do + { + unsigned int bI = *bitIndex; + unsigned int bitsLeft = WordSize - (bI & IndexMask); + unsigned int sliceWidth = bitsLeft < fieldWidth ? bitsLeft : fieldWidth; + + field |= (((bitArray[bI >> ShiftRight] >> (bitsLeft - sliceWidth)) & ((1 << sliceWidth) - 1)) << (fieldWidth - sliceWidth)); + + *bitIndex = bI + sliceWidth; + fieldWidth -= sliceWidth; + } + while ( fieldWidth != 0 ); + + if (gray) + { + /* Convert from Gray code to binary. Works for maximum 8-bit fields. */ + t = field ^ (field >> 8); + t ^= (t >> 4); + t ^= (t >> 2); + t ^= (t >> 1); + } + else + { + t = field; + } + + return t; +} diff --git a/codec2/qbase.cpp b/codec2/qbase.cpp new file mode 100644 index 0000000..a94ef46 --- /dev/null +++ b/codec2/qbase.cpp @@ -0,0 +1,247 @@ +#include +#include + +#include "qbase.h" + +/*---------------------------------------------------------------------------*\ + + quantise + + Quantises vec by choosing the nearest vector in codebook cb, and + returns the vector index. The squared error of the quantised vector + is added to se. + +\*---------------------------------------------------------------------------*/ + +long CQbase::quantise(const float *cb, float vec[], float w[], int k, int m, float *se) +/* float cb[][K]; current VQ codebook */ +/* float vec[]; vector to quantise */ +/* float w[]; weighting vector */ +/* int k; dimension of vectors */ +/* int m; size of codebook */ +/* float *se; accumulated squared error */ +{ + float e; /* current error */ + long besti; /* best index so far */ + float beste; /* best error so far */ + long j; + int i; + float diff; + + besti = 0; + beste = 1E32; + for(j=0; jWo/PI)*4000.0/50.0)/log10f(2); + x[1] = 10.0*log10f(1e-4 + e); + + compute_weights2(x, xq, w); + for (i=0; iWo_min; + float Wo_max = c2const->Wo_max; + + for (i=0; iWo = powf(2.0, xq[0])*(PI*50.0)/4000.0; + + /* bit errors can make us go out of range leading to all sorts of + probs like seg faults */ + + if (model->Wo > Wo_max) model->Wo = Wo_max; + if (model->Wo < Wo_min) model->Wo = Wo_min; + + model->L = PI/model->Wo; /* if we quantise Wo re-compute L */ + + *e = exp10f(xq[1]/10.0); +} + +void CQbase::compute_weights2(const float *x, const float *xp, float *w) +{ + w[0] = 30; + w[1] = 1; + if (x[1]<0) + { + w[0] *= .6; + w[1] *= .3; + } + if (x[1]<-10) + { + w[0] *= .3; + w[1] *= .3; + } + /* Higher weight if pitch is stable */ + if (fabsf(x[0]-xp[0])<.2) + { + w[0] *= 2; + w[1] *= 1.5; + } + else if (fabsf(x[0]-xp[0])>.5) /* Lower if not stable */ + { + w[0] *= .5; + } + + /* Lower weight for low energy */ + if (x[1] < xp[1]-10) + { + w[1] *= .5; + } + if (x[1] < xp[1]-20) + { + w[1] *= .5; + } + + //w[0] = 30; + //w[1] = 1; + + /* Square the weights because it's applied on the squared error */ + w[0] *= w[0]; + w[1] *= w[1]; + +} + +int CQbase::find_nearest_weighted(const float *codebook, int nb_entries, float *x, const float *w, int ndim) +{ + int i, j; + float min_dist = 1e15; + int nearest = 0; + + for (i=0; iWo_min; + float Wo_max = c2const->Wo_max; + float norm; + + norm = (log10f(Wo) - log10f(Wo_min))/(log10f(Wo_max) - log10f(Wo_min)); + index = floorf(Wo_levels * norm + 0.5); + if (index < 0 ) index = 0; + if (index > (Wo_levels-1)) index = Wo_levels-1; + + return index; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: decode_log_Wo() + AUTHOR......: David Rowe + DATE CREATED: 22/8/2010 + + Decodes Wo using a WO_LEVELS quantiser in the log domain. + +\*---------------------------------------------------------------------------*/ + +float CQbase::decode_log_Wo(C2CONST *c2const, int index, int bits) +{ + float Wo_min = c2const->Wo_min; + float Wo_max = c2const->Wo_max; + float step; + float Wo; + int Wo_levels = 1<. + +*/ + +#include +#include +#include +#include +#include +#include + +#include "defines.h" +#include "quantise.h" +#include "lpc.h" +#include "kiss_fft.h" + +extern CKissFFT kiss; + +#define LSP_DELTA1 0.01 /* grid spacing for LSP root searches */ + +/*---------------------------------------------------------------------------*\ + + FUNCTIONS + +\*---------------------------------------------------------------------------*/ + +int CQuantize::lsp_bits(int i) +{ + return lsp_cb[i].log2m; +} + +int CQuantize::lspd_bits(int i) +{ + return lsp_cbd[i].log2m; +} + + + +/*---------------------------------------------------------------------------*\ + + encode_lspds_scalar() + + Scalar/VQ LSP difference quantiser. + +\*---------------------------------------------------------------------------*/ + +void CQuantize::encode_lspds_scalar(int indexes[], float lsp[], int order) +{ + int i,k,m; + float lsp_hz[order]; + float lsp__hz[order]; + float dlsp[order]; + float dlsp_[order]; + float wt[order]; + const float *cb; + float se; + + for(i=0; i Ww[FFT_ENC/2+1]; /* weighting spectrum */ + float Rw[FFT_ENC/2+1]; /* R = WA */ + float e_before, e_after, gain; + float Pfw; + float max_Rw, min_Rw; + float coeff; + + /* Determine weighting filter spectrum W(exp(jw)) ---------------*/ + + for(i=0; i max_Rw) + max_Rw = Rw[i]; + if (Rw[i] < min_Rw) + min_Rw = Rw[i]; + + } + + /* create post filter mag spectrum and apply ------------------*/ + + /* measure energy before post filtering */ + + e_before = 1E-4; + for(i=0; i Aw[] /* output power spectrum */ +) +{ + int i,m; /* loop variables */ + int am,bm; /* limits of current band */ + float r; /* no. rads/bin */ + float Em; /* energy in band */ + float Am; /* spectral amplitude sample */ + float signal, noise; + + r = TWO_PI/(FFT_ENC); + + /* Determine DFT of A(exp(jw)) --------------------------------------------*/ + { + float a[FFT_ENC]; /* input to FFT for power spectrum */ + + for(i=0; iL; m++) + { + am = (int)((m - 0.5)*model->Wo/r + 0.5); + bm = (int)((m + 0.5)*model->Wo/r + 0.5); + + // FIXME: With arm_rfft_fast_f32 we have to use this + // otherwise sometimes a to high bm is calculated + // which causes trouble later in the calculation + // chain + // it seems for some reason model->Wo is calculated somewhat too high + if (bm>FFT_ENC/2) + { + bm = FFT_ENC/2; + } + Em = 0.0; + + for(i=am; iA[m]*model->A[m]; + noise += (model->A[m] - Am)*(model->A[m] - Am); + + /* This code significantly improves perf of LPC model, in + particular when combined with phase0. The LPC spectrum tends + to track just under the peaks of the spectral envelope, and + just above nulls. This algorithm does the reverse to + compensate - raising the amplitudes of spectral peaks, while + attenuating the null. This enhances the formants, and + supresses the energy between formants. */ + + if (sim_pf) + { + if (Am > model->A[m]) + Am *= 0.7; + if (Am < model->A[m]) + Am *= 1.4; + } + model->A[m] = Am; + } + *snr = 10.0*log10f(signal/noise); +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: encode_Wo() + AUTHOR......: David Rowe + DATE CREATED: 22/8/2010 + + Encodes Wo using a WO_LEVELS quantiser. + +\*---------------------------------------------------------------------------*/ + +int CQuantize::encode_Wo(C2CONST *c2const, float Wo, int bits) +{ + int index, Wo_levels = 1<Wo_min; + float Wo_max = c2const->Wo_max; + float norm; + + norm = (Wo - Wo_min)/(Wo_max - Wo_min); + index = floorf(Wo_levels * norm + 0.5); + if (index < 0 ) index = 0; + if (index > (Wo_levels-1)) index = Wo_levels-1; + + return index; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: decode_Wo() + AUTHOR......: David Rowe + DATE CREATED: 22/8/2010 + + Decodes Wo using a WO_LEVELS quantiser. + +\*---------------------------------------------------------------------------*/ + +float CQuantize::decode_Wo(C2CONST *c2const, int index, int bits) +{ + float Wo_min = c2const->Wo_min; + float Wo_max = c2const->Wo_max; + float step; + float Wo; + int Wo_levels = 1<Wo < (PI*150.0/4000)) + { + model->A[1] *= 0.032; + } +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: encode_energy() + AUTHOR......: David Rowe + DATE CREATED: 22/8/2010 + + Encodes LPC energy using an E_LEVELS quantiser. + +\*---------------------------------------------------------------------------*/ + +int CQuantize::encode_energy(float e, int bits) +{ + int index, e_levels = 1< (e_levels-1)) index = e_levels-1; + + return index; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: decode_energy() + AUTHOR......: David Rowe + DATE CREATED: 22/8/2010 + + Decodes energy using a E_LEVELS quantiser. + +\*---------------------------------------------------------------------------*/ + +float CQuantize::decode_energy(int index, int bits) +{ + float e_min = E_MIN_DB; + float e_max = E_MAX_DB; + float step; + float e; + int e_levels = 1<= -1.0)) + { + xr = xl - delta ; /* interval spacing */ + psumr = cheb_poly_eva(pt,xr,order);/* poly(xl-delta_x) */ + temp_psumr = psumr; + temp_xr = xr; + + /* if no sign change increment xr and re-evaluate + poly(xr). Repeat til sign change. if a sign change has + occurred the interval is bisected and then checked again + for a sign change which determines in which interval the + zero lies in. If there is no sign change between poly(xm) + and poly(xl) set interval between xm and xr else set + interval between xl and xr and repeat till root is located + within the specified limits */ + + if(((psumr*psuml)<0.0) || (psumr == 0.0)) + { + roots++; + + psumm=psuml; + for(k=0; k<=nb; k++) + { + xm = (xl+xr)/2; /* bisect the interval */ + psumm=cheb_poly_eva(pt,xm,order); + if(psumm*psuml>0.) + { + psuml=psumm; + xl=xm; + } + else + { + psumr=psumm; + xr=xm; + } + } + + /* once zero is found, reset initial interval to xr */ + freq[j] = (xm); + xl = xm; + flag = 0; /* reset flag for next search */ + } + else + { + psuml=temp_psumr; + xl=temp_xr; + } + } + } + + /* convert from x domain to radians */ + + for(i=0; i. +*/ + +#ifndef __QUANTISE__ +#define __QUANTISE__ + +#include + +#include "qbase.h" + +class CQuantize : public CQbase { +public: + void aks_to_M2(FFTR_STATE *fftr_fwd_cfg, float ak[], int order, MODEL *model, float E, float *snr, int sim_pf, int pf, int bass_boost, float beta, float gamma, std::complex Aw[]); + + int encode_Wo(C2CONST *c2const, float Wo, int bits); + float decode_Wo(C2CONST *c2const, int index, int bits); + void encode_lsps_scalar(int indexes[], float lsp[], int order); + void decode_lsps_scalar(float lsp[], int indexes[], int order); + void encode_lspds_scalar(int indexes[], float lsp[], int order); + void decode_lspds_scalar(float lsp[], int indexes[], int order); + + int encode_energy(float e, int bits); + float decode_energy(int index, int bits); + + void pack(unsigned char * bits, unsigned int *nbit, int index, unsigned int index_bits); + void pack_natural_or_gray(unsigned char * bits, unsigned int *nbit, int index, unsigned int index_bits, unsigned int gray); + int unpack(const unsigned char * bits, unsigned int *nbit, unsigned int index_bits); + int unpack_natural_or_gray(const unsigned char * bits, unsigned int *nbit, unsigned int index_bits, unsigned int gray); + + int lsp_bits(int i); + int lspd_bits(int i); + + void apply_lpc_correction(MODEL *model); + float speech_to_uq_lsps(float lsp[], float ak[], float Sn[], float w[], int m_pitch, int order); + int check_lsp_order(float lsp[], int lpc_order); + void bw_expand_lsps(float lsp[], int order, float min_sep_low, float min_sep_high); + +private: + void compute_weights(const float *x, float *w, int ndim); + int find_nearest(const float *codebook, int nb_entries, float *x, int ndim); + void lpc_post_filter(FFTR_STATE *fftr_fwd_cfg, float Pw[], float ak[], int order, float beta, float gamma, int bass_boost, float E); + int lpc_to_lsp (float *a, int lpcrdr, float *freq, int nb, float delta); + float cheb_poly_eva(float *coef,float x,int order); +}; + +#endif