From 3533472690075104eb59f275e80359ecc91fb2aa Mon Sep 17 00:00:00 2001
From: Tom Early <n7tae@tearly.net>
Date: Mon, 29 Nov 2021 05:50:23 -0700
Subject: [PATCH] added codec2 from mvoice

---
 codec2/codebooks.cpp     |  964 +++++++++++++++++++++
 codec2/codec2.cpp        | 1732 ++++++++++++++++++++++++++++++++++++++
 codec2/codec2.h          |   98 +++
 codec2/codec2_internal.h |   67 ++
 codec2/defines.h         |  127 +++
 codec2/kiss_fft.cpp      |  435 ++++++++++
 codec2/kiss_fft.h        |   35 +
 codec2/lpc.cpp           |  311 +++++++
 codec2/lpc.h             |   47 ++
 codec2/nlp.cpp           |  520 ++++++++++++
 codec2/nlp.h             |   87 ++
 codec2/pack.cpp          |  139 +++
 codec2/qbase.cpp         |  247 ++++++
 codec2/qbase.h           |   39 +
 codec2/quantise.cpp      |  897 ++++++++++++++++++++
 codec2/quantise.h        |   68 ++
 16 files changed, 5813 insertions(+)
 create mode 100644 codec2/codebooks.cpp
 create mode 100644 codec2/codec2.cpp
 create mode 100644 codec2/codec2.h
 create mode 100644 codec2/codec2_internal.h
 create mode 100644 codec2/defines.h
 create mode 100644 codec2/kiss_fft.cpp
 create mode 100644 codec2/kiss_fft.h
 create mode 100644 codec2/lpc.cpp
 create mode 100644 codec2/lpc.h
 create mode 100644 codec2/nlp.cpp
 create mode 100644 codec2/nlp.h
 create mode 100644 codec2/pack.cpp
 create mode 100644 codec2/qbase.cpp
 create mode 100644 codec2/qbase.h
 create mode 100644 codec2/quantise.cpp
 create mode 100644 codec2/quantise.h

diff --git a/codec2/codebooks.cpp b/codec2/codebooks.cpp
new file mode 100644
index 0000000..bf319ce
--- /dev/null
+++ b/codec2/codebooks.cpp
@@ -0,0 +1,964 @@
+/*
+ * This intermediary file and the files that used to create it are under
+ * The LGPL. See the file COPYING.
+ */
+
+#include "defines.h"
+
+/* codebook/lsp1.txt */
+static float codes00[] =
+{
+	225,
+	250,
+	275,
+	300,
+	325,
+	350,
+	375,
+	400,
+	425,
+	450,
+	475,
+	500,
+	525,
+	550,
+	575,
+	600
+};
+/* codebook/lsp2.txt */
+static float codes01[] =
+{
+	325,
+	350,
+	375,
+	400,
+	425,
+	450,
+	475,
+	500,
+	525,
+	550,
+	575,
+	600,
+	625,
+	650,
+	675,
+	700
+};
+/* codebook/lsp3.txt */
+static float codes02[] =
+{
+	500,
+	550,
+	600,
+	650,
+	700,
+	750,
+	800,
+	850,
+	900,
+	950,
+	1000,
+	1050,
+	1100,
+	1150,
+	1200,
+	1250
+};
+/* codebook/lsp4.txt */
+static float codes03[] =
+{
+	700,
+	800,
+	900,
+	1000,
+	1100,
+	1200,
+	1300,
+	1400,
+	1500,
+	1600,
+	1700,
+	1800,
+	1900,
+	2000,
+	2100,
+	2200
+};
+/* codebook/lsp5.txt */
+static float codes04[] =
+{
+	950,
+	1050,
+	1150,
+	1250,
+	1350,
+	1450,
+	1550,
+	1650,
+	1750,
+	1850,
+	1950,
+	2050,
+	2150,
+	2250,
+	2350,
+	2450
+};
+/* codebook/lsp6.txt */
+static float codes05[] =
+{
+	1100,
+	1200,
+	1300,
+	1400,
+	1500,
+	1600,
+	1700,
+	1800,
+	1900,
+	2000,
+	2100,
+	2200,
+	2300,
+	2400,
+	2500,
+	2600
+};
+/* codebook/lsp7.txt */
+static float codes06[] =
+{
+	1500,
+	1600,
+	1700,
+	1800,
+	1900,
+	2000,
+	2100,
+	2200,
+	2300,
+	2400,
+	2500,
+	2600,
+	2700,
+	2800,
+	2900,
+	3000
+};
+/* codebook/lsp8.txt */
+static float codes07[] =
+{
+	2300,
+	2400,
+	2500,
+	2600,
+	2700,
+	2800,
+	2900,
+	3000
+};
+/* codebook/lsp9.txt */
+static float codes08[] =
+{
+	2500,
+	2600,
+	2700,
+	2800,
+	2900,
+	3000,
+	3100,
+	3200
+};
+/* codebook/lsp10.txt */
+static float codes09[] =
+{
+	2900,
+	3100,
+	3300,
+	3500
+};
+
+const struct lsp_codebook lsp_cb[] =
+{
+	/* codebook/lsp1.txt */
+	{
+		1,
+		4,
+		16,
+		codes00
+	},
+	/* codebook/lsp2.txt */
+	{
+		1,
+		4,
+		16,
+		codes01
+	},
+	/* codebook/lsp3.txt */
+	{
+		1,
+		4,
+		16,
+		codes02
+	},
+	/* codebook/lsp4.txt */
+	{
+		1,
+		4,
+		16,
+		codes03
+	},
+	/* codebook/lsp5.txt */
+	{
+		1,
+		4,
+		16,
+		codes04
+	},
+	/* codebook/lsp6.txt */
+	{
+		1,
+		4,
+		16,
+		codes05
+	},
+	/* codebook/lsp7.txt */
+	{
+		1,
+		4,
+		16,
+		codes06
+	},
+	/* codebook/lsp8.txt */
+	{
+		1,
+		3,
+		8,
+		codes07
+	},
+	/* codebook/lsp9.txt */
+	{
+		1,
+		3,
+		8,
+		codes08
+	},
+	/* codebook/lsp10.txt */
+	{
+		1,
+		2,
+		4,
+		codes09
+	},
+	{ 0, 0, 0, 0 }
+};
+
+/* codebook/dlsp1.txt */
+static float codes10[] =
+{
+	25,
+	50,
+	75,
+	100,
+	125,
+	150,
+	175,
+	200,
+	225,
+	250,
+	275,
+	300,
+	325,
+	350,
+	375,
+	400,
+	425,
+	450,
+	475,
+	500,
+	525,
+	550,
+	575,
+	600,
+	625,
+	650,
+	675,
+	700,
+	725,
+	750,
+	775,
+	800
+};
+/* codebook/dlsp2.txt */
+static float codes11[] =
+{
+	25,
+	50,
+	75,
+	100,
+	125,
+	150,
+	175,
+	200,
+	225,
+	250,
+	275,
+	300,
+	325,
+	350,
+	375,
+	400,
+	425,
+	450,
+	475,
+	500,
+	525,
+	550,
+	575,
+	600,
+	625,
+	650,
+	675,
+	700,
+	725,
+	750,
+	775,
+	800
+};
+/* codebook/dlsp3.txt */
+static float codes12[] =
+{
+	25,
+	50,
+	75,
+	100,
+	125,
+	150,
+	175,
+	200,
+	225,
+	250,
+	275,
+	300,
+	325,
+	350,
+	375,
+	400,
+	425,
+	450,
+	475,
+	500,
+	525,
+	550,
+	575,
+	600,
+	625,
+	650,
+	675,
+	700,
+	725,
+	750,
+	775,
+	800
+};
+/* codebook/dlsp4.txt */
+static float codes13[] =
+{
+	25,
+	50,
+	75,
+	100,
+	125,
+	150,
+	175,
+	200,
+	250,
+	300,
+	350,
+	400,
+	450,
+	500,
+	550,
+	600,
+	650,
+	700,
+	750,
+	800,
+	850,
+	900,
+	950,
+	1000,
+	1050,
+	1100,
+	1150,
+	1200,
+	1250,
+	1300,
+	1350,
+	1400
+};
+/* codebook/dlsp5.txt */
+static float codes14[] =
+{
+	25,
+	50,
+	75,
+	100,
+	125,
+	150,
+	175,
+	200,
+	250,
+	300,
+	350,
+	400,
+	450,
+	500,
+	550,
+	600,
+	650,
+	700,
+	750,
+	800,
+	850,
+	900,
+	950,
+	1000,
+	1050,
+	1100,
+	1150,
+	1200,
+	1250,
+	1300,
+	1350,
+	1400
+};
+/* codebook/dlsp6.txt */
+static float codes15[] =
+{
+	25,
+	50,
+	75,
+	100,
+	125,
+	150,
+	175,
+	200,
+	250,
+	300,
+	350,
+	400,
+	450,
+	500,
+	550,
+	600,
+	650,
+	700,
+	750,
+	800,
+	850,
+	900,
+	950,
+	1000,
+	1050,
+	1100,
+	1150,
+	1200,
+	1250,
+	1300,
+	1350,
+	1400
+};
+/* codebook/dlsp7.txt */
+static float codes16[] =
+{
+	25,
+	50,
+	75,
+	100,
+	125,
+	150,
+	175,
+	200,
+	225,
+	250,
+	275,
+	300,
+	325,
+	350,
+	375,
+	400,
+	425,
+	450,
+	475,
+	500,
+	525,
+	550,
+	575,
+	600,
+	625,
+	650,
+	675,
+	700,
+	725,
+	750,
+	775,
+	800
+};
+/* codebook/dlsp8.txt */
+static float codes17[] =
+{
+	25,
+	50,
+	75,
+	100,
+	125,
+	150,
+	175,
+	200,
+	225,
+	250,
+	275,
+	300,
+	325,
+	350,
+	375,
+	400,
+	425,
+	450,
+	475,
+	500,
+	525,
+	550,
+	575,
+	600,
+	625,
+	650,
+	675,
+	700,
+	725,
+	750,
+	775,
+	800
+};
+/* codebook/dlsp9.txt */
+static float codes18[] =
+{
+	25,
+	50,
+	75,
+	100,
+	125,
+	150,
+	175,
+	200,
+	225,
+	250,
+	275,
+	300,
+	325,
+	350,
+	375,
+	400,
+	425,
+	450,
+	475,
+	500,
+	525,
+	550,
+	575,
+	600,
+	625,
+	650,
+	675,
+	700,
+	725,
+	750,
+	775,
+	800
+};
+/* codebook/dlsp10.txt */
+static float codes19[] =
+{
+	25,
+	50,
+	75,
+	100,
+	125,
+	150,
+	175,
+	200,
+	225,
+	250,
+	275,
+	300,
+	325,
+	350,
+	375,
+	400,
+	425,
+	450,
+	475,
+	500,
+	525,
+	550,
+	575,
+	600,
+	625,
+	650,
+	675,
+	700,
+	725,
+	750,
+	775,
+	800
+};
+
+const struct lsp_codebook lsp_cbd[] =
+{
+	/* codebook/dlsp1.txt */
+	{
+		1,
+		5,
+		32,
+		codes10
+	},
+	/* codebook/dlsp2.txt */
+	{
+		1,
+		5,
+		32,
+		codes11
+	},
+	/* codebook/dlsp3.txt */
+	{
+		1,
+		5,
+		32,
+		codes12
+	},
+	/* codebook/dlsp4.txt */
+	{
+		1,
+		5,
+		32,
+		codes13
+	},
+	/* codebook/dlsp5.txt */
+	{
+		1,
+		5,
+		32,
+		codes14
+	},
+	/* codebook/dlsp6.txt */
+	{
+		1,
+		5,
+		32,
+		codes15
+	},
+	/* codebook/dlsp7.txt */
+	{
+		1,
+		5,
+		32,
+		codes16
+	},
+	/* codebook/dlsp8.txt */
+	{
+		1,
+		5,
+		32,
+		codes17
+	},
+	/* codebook/dlsp9.txt */
+	{
+		1,
+		5,
+		32,
+		codes18
+	},
+	/* codebook/dlsp10.txt */
+	{
+		1,
+		5,
+		32,
+		codes19
+	},
+	{ 0, 0, 0, 0 }
+};
+
+
+/* codebook/gecb.txt */
+static float codes30[] =
+{
+	2.71,  12.0184,
+	0.04675,  -2.73881,
+	0.120993,  8.38895,
+	-1.58028,  -0.892307,
+	1.19307,  -1.91561,
+	0.187101,  -3.27679,
+	0.332251,  -7.66455,
+	-1.47944,  31.2461,
+	1.52761,  27.7095,
+	-0.524379,  5.25012,
+	0.55333,  7.4388,
+	-0.843451,  -1.95299,
+	2.26389,  8.61029,
+	0.143143,  2.36549,
+	0.616506,  1.28427,
+	-1.71133,  22.0967,
+	1.00813,  17.3965,
+	-0.106718,  1.41891,
+	-0.136246,  14.2736,
+	-1.70909,  -20.5319,
+	1.65787,  -3.39107,
+	0.138049,  -4.95785,
+	0.536729,  -1.94375,
+	0.196307,  36.8519,
+	1.27248,  22.5565,
+	-0.670219,  -1.90604,
+	0.382092,  6.40113,
+	-0.756911,  -4.90102,
+	1.82931,  4.6138,
+	0.318794,  0.73683,
+	0.612815,  -2.07505,
+	-0.410151,  24.7871,
+	1.77602,  13.1909,
+	0.106457,  -0.104492,
+	0.192206,  10.1838,
+	-1.82442,  -7.71565,
+	0.931346,  4.34835,
+	0.308813,  -4.086,
+	0.397143,  -11.8089,
+	-0.048715,  41.2273,
+	0.877342,  35.8503,
+	-0.759794,  0.476634,
+	0.978593,  7.67467,
+	-1.19506,  3.03883,
+	2.63989,  -3.41106,
+	0.191127,  3.60351,
+	0.402932,  1.0843,
+	-2.15202,  18.1076,
+	1.5468,  8.32271,
+	-0.143089,  -4.07592,
+	-0.150142,  5.86674,
+	-1.40844,  -3.2507,
+	1.56615,  -10.4132,
+	0.178171,  -10.2267,
+	0.362164,  -0.028556,
+	-0.070125,  24.3907,
+	0.594752,  17.4828,
+	-0.28698,  -6.90407,
+	0.464818,  10.2055,
+	-1.00684,  -14.3572,
+	2.32957,  -3.69161,
+	0.335745,  2.40714,
+	1.01966,  -3.15565,
+	-1.25945,  7.9919,
+	2.38369,  19.6806,
+	-0.094947,  -2.41374,
+	0.20933,  6.66477,
+	-2.22103,  1.37986,
+	1.29239,  2.04633,
+	0.243626,  -0.890741,
+	0.428773,  -7.19366,
+	-1.11374,  41.3414,
+	2.6098,  31.1405,
+	-0.446468,  2.53419,
+	0.490104,  4.62757,
+	-1.11723,  -3.24174,
+	1.79156,  8.41493,
+	0.156012,  0.183336,
+	0.532447,  3.15455,
+	-0.764484,  18.514,
+	0.952395,  11.7713,
+	-0.332567,  0.346987,
+	0.202165,  14.7168,
+	-2.12924,  -15.559,
+	1.35358,  -1.92679,
+	-0.010963,  -16.3364,
+	0.399053,  -2.79057,
+	0.750657,  31.1483,
+	0.655743,  24.4819,
+	-0.45321,  -0.735879,
+	0.2869,  6.5467,
+	-0.715673,  -12.3578,
+	1.54849,  3.87217,
+	0.271874,  0.802339,
+	0.502073,  -4.85485,
+	-0.497037,  17.7619,
+	1.19116,  13.9544,
+	0.01563,  1.33157,
+	0.341867,  8.93537,
+	-2.31601,  -5.39506,
+	0.75861,  1.9645,
+	0.24132,  -3.23769,
+	0.267151,  -11.2344,
+	-0.273126,  32.6248,
+	1.75352,  40.432,
+	-0.784011,  3.04576,
+	0.705987,  5.66118,
+	-1.3864,  1.35356,
+	2.37646,  1.67485,
+	0.242973,  4.73218,
+	0.491227,  0.354061,
+	-1.60676,  8.65895,
+	1.16711,  5.9871,
+	-0.137601,  -12.0417,
+	-0.251375,  10.3972,
+	-1.43151,  -8.90411,
+	0.98828,  -13.209,
+	0.261484,  -6.35497,
+	0.395932,  -0.702529,
+	0.283704,  26.8996,
+	0.420959,  15.4418,
+	-0.355804,  -13.7278,
+	0.527372,  12.3985,
+	-1.16956,  -15.9985,
+	1.90669,  -5.81605,
+	0.354492,  3.85157,
+	0.82576,  -4.16264,
+	-0.49019,  13.0572,
+	2.25577,  13.5264,
+	-0.004956,  -3.23713,
+	0.026709,  7.86645,
+	-1.81037,  -0.451183,
+	1.08383,  -0.18362,
+	0.135836,  -2.26658,
+	0.375812,  -5.51225,
+	-1.96644,  38.6829,
+	1.97799,  24.5655,
+	-0.704656,  6.35881,
+	0.480786,  7.05175,
+	-0.976417,  -2.42273,
+	2.50215,  6.75935,
+	0.083588,  3.2588,
+	0.543629,  0.910013,
+	-1.23196,  23.0915,
+	0.785492,  14.807,
+	-0.213554,  1.688,
+	0.004748,  18.1718,
+	-1.54719,  -16.1168,
+	1.50104,  -3.28114,
+	0.080133,  -4.63472,
+	0.476592,  -2.18093,
+	0.44247,  40.304,
+	1.07277,  27.592,
+	-0.594738,  -4.16681,
+	0.42248,  7.61609,
+	-0.927521,  -7.27441,
+	1.99162,  1.29636,
+	0.291307,  2.39878,
+	0.721081,  -1.95062,
+	-0.804256,  24.9295,
+	1.64839,  19.1197,
+	0.060852,  -0.590639,
+	0.266085,  9.10325,
+	-1.9574,  -2.88461,
+	1.11693,  2.6724,
+	0.35458,  -2.74854,
+	0.330733,  -14.1561,
+	-0.527851,  39.5756,
+	0.991152,  43.195,
+	-0.589619,  1.26919,
+	0.787401,  8.73071,
+	-1.0138,  1.02507,
+	2.8254,  1.89538,
+	0.24089,  2.74557,
+	0.427195,  2.54446,
+	-1.95311,  12.244,
+	1.44862,  12.0607,
+	-0.210492,  -3.37906,
+	-0.056713,  10.204,
+	-1.65237,  -5.10274,
+	1.29475,  -12.2708,
+	0.111608,  -8.67592,
+	0.326634,  -1.16763,
+	0.021781,  31.1258,
+	0.455335,  21.4684,
+	-0.37544,  -3.37121,
+	0.39362,  11.302,
+	-0.851456,  -19.4149,
+	2.10703,  -2.22886,
+	0.373233,  1.92406,
+	0.884438,  -1.72058,
+	-0.975127,  9.84013,
+	2.0033,  17.3954,
+	-0.036915,  -1.11137,
+	0.148456,  5.39997,
+	-1.91441,  4.77382,
+	1.44791,  0.537122,
+	0.194979,  -1.03818,
+	0.495771,  -9.95502,
+	-1.05899,  32.9471,
+	2.01122,  32.4544,
+	-0.30965,  4.71911,
+	0.436082,  4.63552,
+	-1.23711,  -1.25428,
+	2.02274,  9.42834,
+	0.190342,  1.46077,
+	0.479017,  2.48479,
+	-1.07848,  16.2217,
+	1.20764,  9.65421,
+	-0.258087,  -1.67236,
+	0.071852,  13.416,
+	-1.87723,  -16.072,
+	1.28957,  -4.87118,
+	0.067713,  -13.4427,
+	0.435551,  -4.1655,
+	0.46614,  30.5895,
+	0.904895,  21.598,
+	-0.518369,  -2.53205,
+	0.337363,  5.63726,
+	-0.554975,  -17.4005,
+	1.69188,  1.14574,
+	0.227934,  0.889297,
+	0.587303,  -5.72973,
+	-0.262133,  18.6666,
+	1.39505,  17.0029,
+	-0.01909,  4.30838,
+	0.304235,  12.6699,
+	-2.07406,  -6.46084,
+	0.920546,  1.21296,
+	0.284927,  -1.78547,
+	0.209724,  -16.024,
+	-0.636067,  31.5768,
+	1.34989,  34.6775,
+	-0.971625,  5.30086,
+	0.590249,  4.44971,
+	-1.56787,  3.60239,
+	2.1455,  4.51666,
+	0.296022,  4.12017,
+	0.445299,  0.868772,
+	-1.44193,  14.1284,
+	1.35575,  6.0074,
+	-0.012814,  -7.49657,
+	-0.43,  8.50012,
+	-1.20469,  -7.11326,
+	1.10102,  -6.83682,
+	0.196463,  -6.234,
+	0.436747,  -1.12979,
+	0.141052,  22.8549,
+	0.290821,  18.8114,
+	-0.529536,  -7.73251,
+	0.63428,  10.7898,
+	-1.33472,  -20.3258,
+	1.81564,  -1.90332,
+	0.394778,  3.79758,
+	0.732682,  -8.18382,
+	-0.741244,  11.7683
+};
+
+const struct lsp_codebook ge_cb[] =
+{
+	/* codebook/gecb.txt */
+	{
+		2,
+		8,
+		256,
+		codes30
+	},
+	{ 0, 0, 0, 0 }
+};
diff --git a/codec2/codec2.cpp b/codec2/codec2.cpp
new file mode 100644
index 0000000..4774b0c
--- /dev/null
+++ b/codec2/codec2.cpp
@@ -0,0 +1,1732 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: codec2.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 21/8/2010
+
+  Codec2 fully quantised encoder and decoder functions.  If you want use
+  codec2, the codec2_xxx functions are for you.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2010 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <math.h>
+
+#include "nlp.h"
+#include "lpc.h"
+#include "quantise.h"
+#include "codec2.h"
+#include "codec2_internal.h"
+
+#define HPF_BETA 0.125
+#define BPF_N 101
+
+CKissFFT kiss;
+
+/*---------------------------------------------------------------------------* \
+
+                             FUNCTION HEADERS
+
+\*---------------------------------------------------------------------------*/
+
+
+
+
+/*---------------------------------------------------------------------------*\
+
+                                FUNCTIONS
+
+\*---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: codec2_create
+  AUTHOR......: David Rowe
+  DATE CREATED: 21/8/2010
+
+  Create and initialise an instance of the codec.  Returns a pointer
+  to the codec states or NULL on failure.  One set of states is
+  sufficient for a full duuplex codec (i.e. an encoder and decoder).
+  You don't need separate states for encoders and decoders.  See
+  c2enc.c and c2dec.c for examples.
+
+\*---------------------------------------------------------------------------*/
+
+CCodec2::CCodec2(bool is_3200)
+{
+	c2.mode = is_3200 ? 3200 : 1600;
+
+	/* store constants in a few places for convenience */
+
+	c2.c2const = c2const_create(8000, N_S);
+	c2.Fs = c2.c2const.Fs;
+	int n_samp = c2.n_samp = c2.c2const.n_samp;
+	int m_pitch = c2.m_pitch = c2.c2const.m_pitch;
+
+	c2.Pn.resize(2*n_samp);
+	c2.Sn_.resize(2*n_samp);
+	c2.w.resize(m_pitch);
+	c2.Sn.resize(m_pitch);
+
+	for(int i=0; i<m_pitch; i++)
+		c2.Sn[i] = 1.0;
+	c2.hpf_states[0] = c2.hpf_states[1] = 0.0;
+	for(int i=0; i<2*n_samp; i++)
+		c2.Sn_[i] = 0;
+	kiss.fft_alloc(c2.fft_fwd_cfg, FFT_ENC, false);
+	kiss.fftr_alloc(c2.fftr_fwd_cfg, FFT_ENC, false);
+	make_analysis_window(&c2.c2const, &c2.fft_fwd_cfg, c2.w.data(), c2.W);
+	make_synthesis_window(&c2.c2const, c2.Pn.data());
+	kiss.fftr_alloc(c2.fftr_inv_cfg, FFT_DEC, true);
+	c2.prev_f0_enc = 1/P_MAX_S;
+	c2.bg_est = 0.0;
+	c2.ex_phase = 0.0;
+
+	for(int l=1; l<=MAX_AMP; l++)
+		c2.prev_model_dec.A[l] = 0.0;
+	c2.prev_model_dec.Wo = TWO_PI/c2.c2const.p_max;
+	c2.prev_model_dec.L = PI/c2.prev_model_dec.Wo;
+	c2.prev_model_dec.voiced = 0;
+
+	for(int i=0; i<LPC_ORD; i++)
+	{
+		c2.prev_lsps_dec[i] = i*PI/(LPC_ORD+1);
+	}
+	c2.prev_e_dec = 1;
+
+	nlp.nlp_create(&c2.c2const);
+
+	c2.lpc_pf = 1;
+	c2.bass_boost = 1;
+	c2.beta = LPCPF_BETA;
+	c2.gamma = LPCPF_GAMMA;
+
+	c2.xq_enc[0] = c2.xq_enc[1] = 0.0;
+	c2.xq_dec[0] = c2.xq_dec[1] = 0.0;
+
+	c2.smoothing = 0;
+
+	c2.bpf_buf.resize(BPF_N+4*c2.n_samp);
+	for(int i=0; i<BPF_N+4*c2.n_samp; i++)
+		c2.bpf_buf[i] = 0.0;
+
+	c2.softdec = NULL;
+	c2.gray = 1;
+
+	// make sure that one of the two decode function pointers is empty
+	// for the encode function pointer this is not required since we always set it
+	// to a meaningful value
+
+	decode = NULL;
+
+	if ( 3200 == c2.mode)
+	{
+		encode = &CCodec2::codec2_encode_3200;
+		decode = &CCodec2::codec2_decode_3200;
+	}
+	else
+	{
+		encode = &CCodec2::codec2_encode_1600;
+		decode = &CCodec2::codec2_decode_1600;
+	}
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: codec2_destroy
+  AUTHOR......: David Rowe
+  DATE CREATED: 21/8/2010
+
+  Destroy an instance of the codec.
+
+\*---------------------------------------------------------------------------*/
+
+CCodec2::~CCodec2()
+{
+	c2.bpf_buf.clear();
+	nlp.nlp_destroy();
+	c2.fft_fwd_cfg.twiddles.clear();
+	c2.fftr_fwd_cfg.substate.twiddles.clear();
+	c2.fftr_fwd_cfg.tmpbuf.clear();
+	c2.fftr_fwd_cfg.super_twiddles.clear();
+	c2.fftr_inv_cfg.substate.twiddles.clear();
+	c2.fftr_inv_cfg.tmpbuf.clear();
+	c2.fftr_inv_cfg.super_twiddles.clear();
+	c2.Pn.clear();
+	c2.Sn.clear();
+	c2.w.clear();
+	c2.Sn_.clear();
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: codec2_bits_per_frame
+  AUTHOR......: David Rowe
+  DATE CREATED: Nov 14 2011
+
+  Returns the number of bits per frame.
+
+\*---------------------------------------------------------------------------*/
+
+int CCodec2::codec2_bits_per_frame()
+{
+	return 64;
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: codec2_samples_per_frame
+  AUTHOR......: David Rowe
+  DATE CREATED: Nov 14 2011
+
+  Returns the number of speech samples per frame.
+
+\*---------------------------------------------------------------------------*/
+
+int CCodec2::codec2_samples_per_frame()
+{
+	if (3200 == c2.mode)
+		return 160;
+	else
+		return 320;
+	return 0; /* shouldnt get here */
+}
+
+void CCodec2::codec2_encode(unsigned char *bits, const short *speech)
+{
+	assert(encode != NULL);
+
+	(*this.*encode)(bits, speech);
+}
+
+void CCodec2::codec2_decode(short *speech, const unsigned char *bits)
+{
+	assert(decode != NULL);
+
+	(*this.*decode)(speech, bits);
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: codec2_encode_3200
+  AUTHOR......: David Rowe
+  DATE CREATED: 13 Sep 2012
+
+  Encodes 160 speech samples (20ms of speech) into 64 bits.
+
+  The codec2 algorithm actually operates internally on 10ms (80
+  sample) frames, so we run the encoding algorithm twice.  On the
+  first frame we just send the voicing bits.  On the second frame we
+  send all model parameters.  Compared to 2400 we use a larger number
+  of bits for the LSPs and non-VQ pitch and energy.
+
+  The bit allocation is:
+
+    Parameter                      bits/frame
+    --------------------------------------
+    Harmonic magnitudes (LSPs)     50
+    Pitch (Wo)                      7
+    Energy                          5
+    Voicing (10ms update)           2
+    TOTAL                          64
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::codec2_encode_3200(unsigned char *bits, const short *speech)
+{
+	MODEL   model;
+	float   ak[LPC_ORD+1];
+	float   lsps[LPC_ORD];
+	float   e;
+	int     Wo_index, e_index;
+	int     lspd_indexes[LPC_ORD];
+	int     i;
+	unsigned int nbit = 0;
+
+	memset(bits, '\0', ((codec2_bits_per_frame() + 7) / 8));
+
+	/* first 10ms analysis frame - we just want voicing */
+
+	analyse_one_frame(&model, speech);
+	qt.pack(bits, &nbit, model.voiced, 1);
+
+	/* second 10ms analysis frame */
+
+	analyse_one_frame(&model, &speech[c2.n_samp]);
+	qt.pack(bits, &nbit, model.voiced, 1);
+	Wo_index = qt.encode_Wo(&c2.c2const, model.Wo, WO_BITS);
+	qt.pack(bits, &nbit, Wo_index, WO_BITS);
+
+	e = qt.speech_to_uq_lsps(lsps, ak, c2.Sn.data(), c2.w.data(), c2.m_pitch, LPC_ORD);
+	e_index = qt.encode_energy(e, E_BITS);
+	qt.pack(bits, &nbit, e_index, E_BITS);
+
+	qt.encode_lspds_scalar(lspd_indexes, lsps, LPC_ORD);
+	for(i=0; i<LSPD_SCALAR_INDEXES; i++)
+	{
+		qt.pack(bits, &nbit, lspd_indexes[i], qt.lspd_bits(i));
+	}
+	assert(nbit == (unsigned)codec2_bits_per_frame());
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: codec2_decode_3200
+  AUTHOR......: David Rowe
+  DATE CREATED: 13 Sep 2012
+
+  Decodes a frame of 64 bits into 160 samples (20ms) of speech.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::codec2_decode_3200(short speech[], const unsigned char * bits)
+{
+	MODEL   model[2];
+	int     lspd_indexes[LPC_ORD];
+	float   lsps[2][LPC_ORD];
+	int     Wo_index, e_index;
+	float   e[2];
+	float   snr;
+	float   ak[2][LPC_ORD+1];
+	int     i,j;
+	unsigned int nbit = 0;
+	std::complex<float>    Aw[FFT_ENC];
+
+	/* only need to zero these out due to (unused) snr calculation */
+
+	for(i=0; i<2; i++)
+		for(j=1; j<=MAX_AMP; j++)
+			model[i].A[j] = 0.0;
+
+	/* unpack bits from channel ------------------------------------*/
+
+	/* this will partially fill the model params for the 2 x 10ms
+	   frames */
+
+	model[0].voiced = qt.unpack(bits, &nbit, 1);
+	model[1].voiced = qt.unpack(bits, &nbit, 1);
+
+	Wo_index = qt.unpack(bits, &nbit, WO_BITS);
+	model[1].Wo = qt.decode_Wo(&c2.c2const, Wo_index, WO_BITS);
+	model[1].L  = PI/model[1].Wo;
+
+	e_index = qt.unpack(bits, &nbit, E_BITS);
+	e[1] = qt.decode_energy(e_index, E_BITS);
+
+	for(i=0; i<LSPD_SCALAR_INDEXES; i++)
+	{
+		lspd_indexes[i] = qt.unpack(bits, &nbit, qt.lspd_bits(i));
+	}
+	qt.decode_lspds_scalar(&lsps[1][0], lspd_indexes, LPC_ORD);
+
+	/* interpolate ------------------------------------------------*/
+
+	/* Wo and energy are sampled every 20ms, so we interpolate just 1
+	   10ms frame between 20ms samples */
+
+	interp_Wo(&model[0], &c2.prev_model_dec, &model[1], c2.c2const.Wo_min);
+	e[0] = interp_energy(c2.prev_e_dec, e[1]);
+
+	/* LSPs are sampled every 20ms so we interpolate the frame in
+	   between, then recover spectral amplitudes */
+
+	interpolate_lsp_ver2(&lsps[0][0], c2.prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD);
+
+	for(i=0; i<2; i++)
+	{
+		lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
+		qt.aks_to_M2(&(c2.fftr_fwd_cfg), &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, c2.lpc_pf, c2.bass_boost, c2.beta, c2.gamma, Aw);
+		qt.apply_lpc_correction(&model[i]);
+		synthesise_one_frame(&speech[c2.n_samp*i], &model[i], Aw, 1.0);
+	}
+
+	/* update memories for next frame ----------------------------*/
+
+	c2.prev_model_dec = model[1];
+	c2.prev_e_dec = e[1];
+	for(i=0; i<LPC_ORD; i++)
+		c2.prev_lsps_dec[i] = lsps[1][i];
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: codec2_encode_1600
+  AUTHOR......: David Rowe
+  DATE CREATED: Feb 28 2013
+
+  Encodes 320 speech samples (40ms of speech) into 64 bits.
+
+  The codec2 algorithm actually operates internally on 10ms (80
+  sample) frames, so we run the encoding algorithm 4 times:
+
+  frame 0: voicing bit
+  frame 1: voicing bit, Wo and E
+  frame 2: voicing bit
+  frame 3: voicing bit, Wo and E, scalar LSPs
+
+  The bit allocation is:
+
+    Parameter                      frame 2  frame 4   Total
+    -------------------------------------------------------
+    Harmonic magnitudes (LSPs)      0       36        36
+    Pitch (Wo)                      7        7        14
+    Energy                          5        5        10
+    Voicing (10ms update)           2        2         4
+    TOTAL                          14       50        64
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::codec2_encode_1600(unsigned char * bits, const short speech[])
+{
+	MODEL   model;
+	float   lsps[LPC_ORD];
+	float   ak[LPC_ORD+1];
+	float   e;
+	int     lsp_indexes[LPC_ORD];
+	int     Wo_index, e_index;
+	int     i;
+	unsigned int nbit = 0;
+
+	memset(bits, '\0',  ((codec2_bits_per_frame() + 7) / 8));
+
+	/* frame 1: - voicing ---------------------------------------------*/
+
+	analyse_one_frame(&model, speech);
+	qt.pack(bits, &nbit, model.voiced, 1);
+
+	/* frame 2: - voicing, scalar Wo & E -------------------------------*/
+
+	analyse_one_frame(&model, &speech[c2.n_samp]);
+	qt.pack(bits, &nbit, model.voiced, 1);
+
+	Wo_index = qt.encode_Wo(&c2.c2const, model.Wo, WO_BITS);
+	qt.pack(bits, &nbit, Wo_index, WO_BITS);
+
+	/* need to run this just to get LPC energy */
+	e = qt.speech_to_uq_lsps(lsps, ak, c2.Sn.data(), c2.w.data(), c2.m_pitch, LPC_ORD);
+	e_index = qt.encode_energy(e, E_BITS);
+	qt.pack(bits, &nbit, e_index, E_BITS);
+
+	/* frame 3: - voicing ---------------------------------------------*/
+
+	analyse_one_frame(&model, &speech[2*c2.n_samp]);
+	qt.pack(bits, &nbit, model.voiced, 1);
+
+	/* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/
+
+	analyse_one_frame(&model, &speech[3*c2.n_samp]);
+	qt.pack(bits, &nbit, model.voiced, 1);
+
+	Wo_index = qt.encode_Wo(&c2.c2const, model.Wo, WO_BITS);
+	qt.pack(bits, &nbit, Wo_index, WO_BITS);
+
+	e = qt.speech_to_uq_lsps(lsps, ak, c2.Sn.data(), c2.w.data(), c2.m_pitch, LPC_ORD);
+	e_index = qt.encode_energy(e, E_BITS);
+	qt.pack(bits, &nbit, e_index, E_BITS);
+
+	qt.encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
+	for(i=0; i<LSP_SCALAR_INDEXES; i++)
+	{
+		qt.pack(bits, &nbit, lsp_indexes[i], qt.lsp_bits(i));
+	}
+
+	assert(nbit == (unsigned)codec2_bits_per_frame());
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: codec2_decode_1600
+  AUTHOR......: David Rowe
+  DATE CREATED: 11 May 2012
+
+  Decodes frames of 64 bits into 320 samples (40ms) of speech.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::codec2_decode_1600(short speech[], const unsigned char * bits)
+{
+	MODEL   model[4];
+	int     lsp_indexes[LPC_ORD];
+	float   lsps[4][LPC_ORD];
+	int     Wo_index, e_index;
+	float   e[4];
+	float   snr;
+	float   ak[4][LPC_ORD+1];
+	int     i,j;
+	unsigned int nbit = 0;
+	float   weight;
+	std::complex<float>    Aw[FFT_ENC];
+
+	/* only need to zero these out due to (unused) snr calculation */
+
+	for(i=0; i<4; i++)
+		for(j=1; j<=MAX_AMP; j++)
+			model[i].A[j] = 0.0;
+
+	/* unpack bits from channel ------------------------------------*/
+
+	/* this will partially fill the model params for the 4 x 10ms
+	   frames */
+
+	model[0].voiced = qt.unpack(bits, &nbit, 1);
+
+	model[1].voiced = qt.unpack(bits, &nbit, 1);
+	Wo_index = qt.unpack(bits, &nbit, WO_BITS);
+	model[1].Wo = qt.decode_Wo(&c2.c2const, Wo_index, WO_BITS);
+	model[1].L  = PI/model[1].Wo;
+
+	e_index = qt.unpack(bits, &nbit, E_BITS);
+	e[1] = qt.decode_energy(e_index, E_BITS);
+
+	model[2].voiced = qt.unpack(bits, &nbit, 1);
+
+	model[3].voiced = qt.unpack(bits, &nbit, 1);
+	Wo_index = qt.unpack(bits, &nbit, WO_BITS);
+	model[3].Wo = qt.decode_Wo(&c2.c2const, Wo_index, WO_BITS);
+	model[3].L  = PI/model[3].Wo;
+
+	e_index = qt.unpack(bits, &nbit, E_BITS);
+	e[3] = qt.decode_energy(e_index, E_BITS);
+
+	for(i=0; i<LSP_SCALAR_INDEXES; i++)
+	{
+		lsp_indexes[i] = qt.unpack(bits, &nbit, qt.lsp_bits(i));
+	}
+	qt.decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
+	qt.check_lsp_order(&lsps[3][0], LPC_ORD);
+	qt.bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
+
+	/* interpolate ------------------------------------------------*/
+
+	/* Wo and energy are sampled every 20ms, so we interpolate just 1
+	   10ms frame between 20ms samples */
+
+	interp_Wo(&model[0], &c2.prev_model_dec, &model[1], c2.c2const.Wo_min);
+	e[0] = interp_energy(c2.prev_e_dec, e[1]);
+	interp_Wo(&model[2], &model[1], &model[3], c2.c2const.Wo_min);
+	e[2] = interp_energy(e[1], e[3]);
+
+	/* LSPs are sampled every 40ms so we interpolate the 3 frames in
+	   between, then recover spectral amplitudes */
+
+	for(i=0, weight=0.25; i<3; i++, weight += 0.25)
+	{
+		interpolate_lsp_ver2(&lsps[i][0], c2.prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
+	}
+	for(i=0; i<4; i++)
+	{
+		lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
+		qt.aks_to_M2(&(c2.fftr_fwd_cfg), &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, c2.lpc_pf, c2.bass_boost, c2.beta, c2.gamma, Aw);
+		qt.apply_lpc_correction(&model[i]);
+		synthesise_one_frame(&speech[c2.n_samp*i], &model[i], Aw, 1.0);
+	}
+
+	/* update memories for next frame ----------------------------*/
+
+	c2.prev_model_dec = model[3];
+	c2.prev_e_dec = e[3];
+	for(i=0; i<LPC_ORD; i++)
+		c2.prev_lsps_dec[i] = lsps[3][i];
+
+}
+
+/*---------------------------------------------------------------------------* \
+
+  FUNCTION....: synthesise_one_frame()
+  AUTHOR......: David Rowe
+  DATE CREATED: 23/8/2010
+
+  Synthesise 80 speech samples (10ms) from model parameters.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::synthesise_one_frame(short speech[], MODEL *model, std::complex<float> Aw[], float gain)
+{
+	int     i;
+
+	/* LPC based phase synthesis */
+	std::complex<float> H[MAX_AMP+1];
+	sample_phase(model, H, Aw);
+	phase_synth_zero_order(c2.n_samp, model, &c2.ex_phase, H);
+
+	postfilter(model, &c2.bg_est);
+	synthesise(c2.n_samp, &(c2.fftr_inv_cfg), c2.Sn_.data(), model, c2.Pn.data(), 1);
+
+	for(i=0; i<c2.n_samp; i++)
+	{
+		c2.Sn_[i] *= gain;
+	}
+
+	ear_protection(c2.Sn_.data(), c2.n_samp);
+
+	for(i=0; i<c2.n_samp; i++)
+	{
+		if (c2.Sn_[i] > 32767.0)
+			speech[i] = 32767;
+		else if (c2.Sn_[i] < -32767.0)
+			speech[i] = -32767;
+		else
+			speech[i] = c2.Sn_[i];
+	}
+
+}
+
+
+/*---------------------------------------------------------------------------* \
+
+  FUNCTION....: analyse_one_frame()
+  AUTHOR......: David Rowe
+  DATE CREATED: 23/8/2010
+
+  Extract sinusoidal model parameters from 80 speech samples (10ms of
+  speech).
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::analyse_one_frame(MODEL *model, const short *speech)
+{
+	std::complex<float>    Sw[FFT_ENC];
+	float   pitch;
+	int     i;
+	int     n_samp = c2.n_samp;
+	int     m_pitch = c2.m_pitch;
+
+	/* Read input speech */
+
+	for(i=0; i<m_pitch-n_samp; i++)
+		c2.Sn[i] = c2.Sn[i+n_samp];
+	for(i=0; i<n_samp; i++)
+		c2.Sn[i+m_pitch-n_samp] = speech[i];
+
+	dft_speech(&c2.c2const, c2.fft_fwd_cfg, Sw, c2.Sn.data(), c2.w.data());
+
+	/* Estimate pitch */
+	nlp.nlp(c2.Sn.data(), n_samp, &pitch, &c2.prev_f0_enc);
+	model->Wo = TWO_PI/pitch;
+	model->L = PI/model->Wo;
+
+	/* estimate model parameters */
+	two_stage_pitch_refinement(&c2.c2const, model, Sw);
+
+	/* estimate phases when doing ML experiments */
+	estimate_amplitudes(model, Sw, 0);
+	est_voicing_mbe(&c2.c2const, model, Sw, c2.W);
+}
+
+
+/*---------------------------------------------------------------------------* \
+
+  FUNCTION....: ear_protection()
+  AUTHOR......: David Rowe
+  DATE CREATED: Nov 7 2012
+
+  Limits output level to protect ears when there are bit errors or the input
+  is overdriven.  This doesn't correct or mask bit errors, just reduces the
+  worst of their damage.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::ear_protection(float in_out[], int n)
+{
+	float max_sample, over, gain;
+	int   i;
+
+	/* find maximum sample in frame */
+
+	max_sample = 0.0;
+	for(i=0; i<n; i++)
+		if (in_out[i] > max_sample)
+			max_sample = in_out[i];
+
+	/* determine how far above set point */
+
+	over = max_sample/30000.0;
+
+	/* If we are x dB over set point we reduce level by 2x dB, this
+	   attenuates major excursions in amplitude (likely to be caused
+	   by bit errors) more than smaller ones */
+
+	if (over > 1.0)
+	{
+		gain = 1.0/(over*over);
+		for(i=0; i<n; i++)
+			in_out[i] *= gain;
+	}
+}
+
+/*---------------------------------------------------------------------------*\
+
+  sample_phase()
+
+  Samples phase at centre of each harmonic from and array of FFT_ENC
+  DFT samples.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::sample_phase(MODEL *model,
+				  std::complex<float> H[],
+				  std::complex<float> A[]        /* LPC analysis filter in freq domain */
+)
+{
+	int   m, b;
+	float r;
+
+	r = TWO_PI/(FFT_ENC);
+
+	/* Sample phase at harmonics */
+
+	for(m=1; m<=model->L; m++)
+	{
+		b = (int)(m*model->Wo/r + 0.5);
+		H[m] = std::conj(A[b]);
+	}
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+   phase_synth_zero_order()
+
+   Synthesises phases based on SNR and a rule based approach.  No phase
+   parameters are required apart from the SNR (which can be reduced to a
+   1 bit V/UV decision per frame).
+
+   The phase of each harmonic is modelled as the phase of a synthesis
+   filter excited by an impulse.  In many Codec 2 modes the synthesis
+   filter is a LPC filter. Unlike the first order model the position
+   of the impulse is not transmitted, so we create an excitation pulse
+   train using a rule based approach.
+
+   Consider a pulse train with a pulse starting time n=0, with pulses
+   repeated at a rate of Wo, the fundamental frequency.  A pulse train
+   in the time domain is equivalent to harmonics in the frequency
+   domain.  We can make an excitation pulse train using a sum of
+   sinsusoids:
+
+     for(m=1; m<=L; m++)
+       ex[n] = cos(m*Wo*n)
+
+   Note: the Octave script ../octave/phase.m is an example of this if
+   you would like to try making a pulse train.
+
+   The phase of each excitation harmonic is:
+
+     arg(E[m]) = mWo
+
+   where E[m] are the complex excitation (freq domain) samples,
+   arg(x), just returns the phase of a complex sample x.
+
+   As we don't transmit the pulse position for this model, we need to
+   synthesise it.  Now the excitation pulses occur at a rate of Wo.
+   This means the phase of the first harmonic advances by N_SAMP samples
+   over a synthesis frame of N_SAMP samples.  For example if Wo is pi/20
+   (200 Hz), then over a 10ms frame (N_SAMP=80 samples), the phase of the
+   first harmonic would advance (pi/20)*80 = 4*pi or two complete
+   cycles.
+
+   We generate the excitation phase of the fundamental (first
+   harmonic):
+
+     arg[E[1]] = Wo*N_SAMP;
+
+   We then relate the phase of the m-th excitation harmonic to the
+   phase of the fundamental as:
+
+     arg(E[m]) = m*arg(E[1])
+
+   This E[m] then gets passed through the LPC synthesis filter to
+   determine the final harmonic phase.
+
+   Comparing to speech synthesised using original phases:
+
+   - Through headphones speech synthesised with this model is not as
+     good. Through a loudspeaker it is very close to original phases.
+
+   - If there are voicing errors, the speech can sound clicky or
+     staticy.  If V speech is mistakenly declared UV, this model tends to
+     synthesise impulses or clicks, as there is usually very little shift or
+     dispersion through the LPC synthesis filter.
+
+   - When combined with LPC amplitude modelling there is an additional
+     drop in quality.  I am not sure why, theory is interformant energy
+     is raised making any phase errors more obvious.
+
+   NOTES:
+
+     1/ This synthesis model is effectively the same as a simple LPC-10
+     vocoders, and yet sounds much better.  Why? Conventional wisdom
+     (AMBE, MELP) says mixed voicing is required for high quality
+     speech.
+
+     2/ I am pretty sure the Lincoln Lab sinusoidal coding guys (like xMBE
+     also from MIT) first described this zero phase model, I need to look
+     up the paper.
+
+     3/ Note that this approach could cause some discontinuities in
+     the phase at the edge of synthesis frames, as no attempt is made
+     to make sure that the phase tracks are continuous (the excitation
+     phases are continuous, but not the final phases after filtering
+     by the LPC spectra).  Technically this is a bad thing.  However
+     this may actually be a good thing, disturbing the phase tracks a
+     bit.  More research needed, e.g. test a synthesis model that adds
+     a small delta-W to make phase tracks line up for voiced
+     harmonics.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::phase_synth_zero_order(
+	int    n_samp,
+	MODEL *model,
+	float *ex_phase,            /* excitation phase of fundamental        */
+	std::complex<float>   H[]                  /* L synthesis filter freq domain samples */
+
+)
+{
+	int   m;
+	float new_phi;
+	std::complex<float>  Ex[MAX_AMP+1];	  /* excitation samples */
+	std::complex<float>  A_[MAX_AMP+1];	  /* synthesised harmonic samples */
+
+	/*
+	   Update excitation fundamental phase track, this sets the position
+	   of each pitch pulse during voiced speech.  After much experiment
+	   I found that using just this frame's Wo improved quality for UV
+	   sounds compared to interpolating two frames Wo like this:
+
+	   ex_phase[0] += (*prev_Wo+model->Wo)*N_SAMP/2;
+	*/
+
+	ex_phase[0] += (model->Wo)*n_samp;
+	ex_phase[0] -= TWO_PI*floorf(ex_phase[0]/TWO_PI + 0.5);
+
+	for(m=1; m<=model->L; m++)
+	{
+
+		/* generate excitation */
+
+		if (model->voiced)
+		{
+			Ex[m] = std::polar(1.0f, ex_phase[0] * m);
+		}
+		else
+		{
+
+			/* When a few samples were tested I found that LPC filter
+			   phase is not needed in the unvoiced case, but no harm in
+			   keeping it.
+			*/
+			float phi = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX;
+			Ex[m] = std::polar(1.0f, phi);
+		}
+
+		/* filter using LPC filter */
+
+		A_[m].real(H[m].real() * Ex[m].real() - H[m].imag() * Ex[m].imag());
+		A_[m].imag(H[m].imag() * Ex[m].real() + H[m].real() * Ex[m].imag());
+
+		/* modify sinusoidal phase */
+
+		new_phi = atan2f(A_[m].imag(), A_[m].real()+1E-12);
+		model->phi[m] = new_phi;
+	}
+
+}
+
+/*---------------------------------------------------------------------------*\
+
+  postfilter()
+
+  The post filter is designed to help with speech corrupted by
+  background noise.  The zero phase model tends to make speech with
+  background noise sound "clicky".  With high levels of background
+  noise the low level inter-formant parts of the spectrum will contain
+  noise rather than speech harmonics, so modelling them as voiced
+  (i.e. a continuous, non-random phase track) is inaccurate.
+
+  Some codecs (like MBE) have a mixed voicing model that breaks the
+  spectrum into voiced and unvoiced regions.  Several bits/frame
+  (5-12) are required to transmit the frequency selective voicing
+  information.  Mixed excitation also requires accurate voicing
+  estimation (parameter estimators always break occasionally under
+  exceptional conditions).
+
+  In our case we use a post filter approach which requires no
+  additional bits to be transmitted.  The decoder measures the average
+  level of the background noise during unvoiced frames.  If a harmonic
+  is less than this level it is made unvoiced by randomising it's
+  phases.
+
+  This idea is rather experimental.  Some potential problems that may
+  happen:
+
+  1/ If someone says "aaaaaaaahhhhhhhhh" will background estimator track
+     up to speech level?  This would be a bad thing.
+
+  2/ If background noise suddenly dissapears from the source speech does
+     estimate drop quickly?  What is noise suddenly re-appears?
+
+  3/ Background noise with a non-flat sepctrum.  Current algorithm just
+     comsiders spectrum as a whole, but this could be broken up into
+     bands, each with their own estimator.
+
+  4/ Males and females with the same level of background noise.  Check
+     performance the same.  Changing Wo affects width of each band, may
+     affect bg energy estimates.
+
+  5/ Not sure what happens during long periods of voiced speech
+     e.g. "sshhhhhhh"
+
+\*---------------------------------------------------------------------------*/
+
+#define BG_THRESH 40.0	// only consider low levels signals for bg_est
+#define BG_BETA    0.1	// averaging filter constant
+#define BG_MARGIN  6.0	// harmonics this far above BG noise are
+                        // randomised.  Helped make bg noise less
+			            // spikey (impulsive) for mmt1, but speech was
+                        // perhaps a little rougher.
+
+void CCodec2::postfilter( MODEL *model, float *bg_est )
+{
+	int   m, uv;
+	float e, thresh;
+
+	/* determine average energy across spectrum */
+
+	e = 1E-12;
+	for(m=1; m<=model->L; m++)
+		e += model->A[m]*model->A[m];
+
+	assert(e > 0.0);
+	e = 10.0*log10f(e/model->L);
+
+	/* If beneath threhold, update bg estimate.  The idea
+	   of the threshold is to prevent updating during high level
+	   speech. */
+
+	if ((e < BG_THRESH) && !model->voiced)
+		*bg_est =  *bg_est*(1.0 - BG_BETA) + e*BG_BETA;
+
+	/* now mess with phases during voiced frames to make any harmonics
+	   less then our background estimate unvoiced.
+	*/
+
+	uv = 0;
+	thresh = exp10f((*bg_est + BG_MARGIN)/20.0);
+	if (model->voiced)
+		for(m=1; m<=model->L; m++)
+			if (model->A[m] < thresh)
+			{
+				model->phi[m] = (TWO_PI/CODEC2_RAND_MAX)*(float)codec2_rand();
+				uv++;
+			}
+}
+
+C2CONST CCodec2::c2const_create(int Fs, float framelength_s)
+{
+	C2CONST c2const;
+
+	assert((Fs == 8000) || (Fs = 16000));
+	c2const.Fs = Fs;
+	c2const.n_samp = round(Fs*framelength_s);
+	c2const.max_amp = floor(Fs*P_MAX_S/2);
+	c2const.p_min = floor(Fs*P_MIN_S);
+	c2const.p_max = floor(Fs*P_MAX_S);
+	c2const.m_pitch = floor(Fs*M_PITCH_S);
+	c2const.Wo_min = TWO_PI/c2const.p_max;
+	c2const.Wo_max = TWO_PI/c2const.p_min;
+
+	if (Fs == 8000)
+	{
+		c2const.nw = 279;
+	}
+	else
+	{
+		c2const.nw = 511;  /* actually a bit shorter in time but lets us maintain constant FFT size */
+	}
+
+	c2const.tw = Fs*TW_S;
+
+	/*
+	fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch);
+	fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max);
+	fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max);
+	fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw);
+	*/
+
+	return c2const;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: make_analysis_window
+  AUTHOR......: David Rowe
+  DATE CREATED: 11/5/94
+
+  Init function that generates the time domain analysis window and it's DFT.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::make_analysis_window(C2CONST *c2const, FFT_STATE *fft_fwd_cfg, float w[], float W[])
+{
+	float m;
+	std::complex<float>  wshift[FFT_ENC];
+	int   i,j;
+	int   m_pitch = c2const->m_pitch;
+	int   nw      = c2const->nw;
+
+	/*
+	   Generate Hamming window centered on M-sample pitch analysis window
+
+	0            M/2           M-1
+	|-------------|-------------|
+	      |-------|-------|
+	          nw samples
+
+	   All our analysis/synthsis is centred on the M/2 sample.
+	*/
+
+	m = 0.0;
+	for(i=0; i<m_pitch/2-nw/2; i++)
+		w[i] = 0.0;
+	for(i=m_pitch/2-nw/2,j=0; i<m_pitch/2+nw/2; i++,j++)
+	{
+		w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(nw-1));
+		m += w[i]*w[i];
+	}
+	for(i=m_pitch/2+nw/2; i<m_pitch; i++)
+		w[i] = 0.0;
+
+	/* Normalise - makes freq domain amplitude estimation straight
+	   forward */
+
+	m = 1.0/sqrtf(m*FFT_ENC);
+	for(i=0; i<m_pitch; i++)
+	{
+		w[i] *= m;
+	}
+
+	/*
+	   Generate DFT of analysis window, used for later processing.  Note
+	   we modulo FFT_ENC shift the time domain window w[], this makes the
+	   imaginary part of the DFT W[] equal to zero as the shifted w[] is
+	   even about the n=0 time axis if nw is odd.  Having the imag part
+	   of the DFT W[] makes computation easier.
+
+	   0                      FFT_ENC-1
+	   |-------------------------|
+
+	    ----\               /----
+	         \             /
+	          \           /          <- shifted version of window w[n]
+	           \         /
+	            \       /
+	             -------
+
+	   |---------|     |---------|
+	     nw/2              nw/2
+	*/
+
+	std::complex<float> temp[FFT_ENC];
+
+	for(i=0; i<FFT_ENC; i++)
+	{
+		wshift[i] = std::complex<float>(0.0f, 0.0f);
+	}
+	for(i=0; i<nw/2; i++)
+		wshift[i].real(w[i+m_pitch/2]);
+	for(i=FFT_ENC-nw/2,j=m_pitch/2-nw/2; i<FFT_ENC; i++,j++)
+		wshift[i].real(w[j]);
+
+	kiss.fft(*fft_fwd_cfg, wshift, temp);
+
+	/*
+	    Re-arrange W[] to be symmetrical about FFT_ENC/2.  Makes later
+	    analysis convenient.
+
+	 Before:
+
+
+	   0                 FFT_ENC-1
+	   |----------|---------|
+	   __                   _
+	     \                 /
+	      \_______________/
+
+	 After:
+
+	   0                 FFT_ENC-1
+	   |----------|---------|
+	             ___
+	            /   \
+	   ________/     \_______
+
+	*/
+
+
+	for(i=0; i<FFT_ENC/2; i++)
+	{
+		W[i] = temp[i + FFT_ENC / 2].real();
+		W[i + FFT_ENC / 2] = temp[i].real();
+	}
+
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: dft_speech
+  AUTHOR......: David Rowe
+  DATE CREATED: 27/5/94
+
+  Finds the DFT of the current speech input speech frame.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::dft_speech(C2CONST *c2const, FFT_STATE &fft_fwd_cfg, std::complex<float> Sw[], float Sn[], float w[])
+{
+    int  i;
+    int  m_pitch = c2const->m_pitch;
+    int   nw      = c2const->nw;
+
+    for(i=0; i<FFT_ENC; i++) {
+		Sw[i] = std::complex<float>(0.0f, 0.0f);
+    }
+
+    /* Centre analysis window on time axis, we need to arrange input
+       to FFT this way to make FFT phases correct */
+
+    /* move 2nd half to start of FFT input vector */
+
+    for(i=0; i<nw/2; i++)
+        Sw[i].real(Sn[i+m_pitch/2]*w[i+m_pitch/2]);
+
+    /* move 1st half to end of FFT input vector */
+
+    for(i=0; i<nw/2; i++)
+        Sw[FFT_ENC-nw/2+i].real(Sn[i+m_pitch/2-nw/2]*w[i+m_pitch/2-nw/2]);
+
+    nlp.codec2_fft_inplace(fft_fwd_cfg, Sw);
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: two_stage_pitch_refinement
+  AUTHOR......: David Rowe
+  DATE CREATED: 27/5/94
+
+  Refines the current pitch estimate using the harmonic sum pitch
+  estimation technique.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, std::complex<float> Sw[])
+{
+	float pmin,pmax,pstep;	/* pitch refinment minimum, maximum and step */
+
+	/* Coarse refinement */
+
+	pmax = TWO_PI/model->Wo + 5;
+	pmin = TWO_PI/model->Wo - 5;
+	pstep = 1.0;
+	hs_pitch_refinement(model, Sw, pmin, pmax, pstep);
+
+	/* Fine refinement */
+
+	pmax = TWO_PI/model->Wo + 1;
+	pmin = TWO_PI/model->Wo - 1;
+	pstep = 0.25;
+	hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
+
+	/* Limit range */
+
+	if (model->Wo < TWO_PI/c2const->p_max)
+		model->Wo = TWO_PI/c2const->p_max;
+	if (model->Wo > TWO_PI/c2const->p_min)
+		model->Wo = TWO_PI/c2const->p_min;
+
+	model->L = floorf(PI/model->Wo);
+
+	/* trap occasional round off issues with floorf() */
+	if (model->Wo*model->L >= 0.95*PI)
+	{
+		model->L--;
+	}
+	assert(model->Wo*model->L < PI);
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: hs_pitch_refinement
+ AUTHOR......: David Rowe
+ DATE CREATED: 27/5/94
+
+ Harmonic sum pitch refinement function.
+
+ pmin   pitch search range minimum
+ pmax	pitch search range maximum
+ step   pitch search step size
+ model	current pitch estimate in model.Wo
+
+ model 	refined pitch estimate in model.Wo
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::hs_pitch_refinement(MODEL *model, std::complex<float> Sw[], float pmin, float pmax, float pstep)
+{
+	int m;		/* loop variable */
+	int b;		/* bin for current harmonic centre */
+	float E;		/* energy for current pitch*/
+	float Wo;		/* current "test" fundamental freq. */
+	float Wom;		/* Wo that maximises E */
+	float Em;		/* mamimum energy */
+	float r, one_on_r;	/* number of rads/bin */
+	float p;		/* current pitch */
+
+	/* Initialisation */
+
+	model->L = PI/model->Wo;	/* use initial pitch est. for L */
+	Wom = model->Wo;
+	Em = 0.0;
+	r = TWO_PI/FFT_ENC;
+	one_on_r = 1.0/r;
+
+	/* Determine harmonic sum for a range of Wo values */
+
+	for(p=pmin; p<=pmax; p+=pstep)
+	{
+		E = 0.0;
+		Wo = TWO_PI/p;
+
+		/* Sum harmonic magnitudes */
+		for(m=1; m<=model->L; m++)
+		{
+			b = (int)(m*Wo*one_on_r + 0.5);
+			E += Sw[b].real() * Sw[b].real() + Sw[b].imag() * Sw[b].imag();
+		}
+		/* Compare to see if this is a maximum */
+
+		if (E > Em)
+		{
+			Em = E;
+			Wom = Wo;
+		}
+	}
+
+	model->Wo = Wom;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: estimate_amplitudes
+  AUTHOR......: David Rowe
+  DATE CREATED: 27/5/94
+
+  Estimates the complex amplitudes of the harmonics.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::estimate_amplitudes(MODEL *model, std::complex<float> Sw[], int est_phase)
+{
+	int   i,m;		/* loop variables */
+	int   am,bm;		/* bounds of current harmonic */
+	float den;		/* denominator of amplitude expression */
+
+	float r = TWO_PI/FFT_ENC;
+	float one_on_r = 1.0/r;
+
+	for(m=1; m<=model->L; m++)
+	{
+		/* Estimate ampltude of harmonic */
+
+		den = 0.0;
+		am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5);
+		bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5);
+
+		for(i=am; i<bm; i++)
+		{
+			den += Sw[i].real() * Sw[i].real() + Sw[i].imag() * Sw[i].imag();
+		}
+
+		model->A[m] = sqrtf(den);
+
+		if (est_phase)
+		{
+			int b = (int)(m*model->Wo/r + 0.5); /* DFT bin of centre of current harmonic */
+
+			/* Estimate phase of harmonic, this is expensive in CPU for
+			   embedded devicesso we make it an option */
+
+			model->phi[m] = atan2f(Sw[b].imag(), Sw[b].real());
+		}
+	}
+}
+
+/*---------------------------------------------------------------------------*\
+
+  est_voicing_mbe()
+
+  Returns the error of the MBE cost function for a fiven F0.
+
+  Note: I think a lot of the operations below can be simplified as
+  W[].imag = 0 and has been normalised such that den always equals 1.
+
+\*---------------------------------------------------------------------------*/
+
+float CCodec2::est_voicing_mbe( C2CONST *c2const, MODEL *model, std::complex<float> Sw[], float  W[])
+{
+	int   l,al,bl,m;    /* loop variables */
+	std::complex<float>  Am;             /* amplitude sample for this band */
+	int   offset;         /* centers Hw[] about current harmonic */
+	float den;            /* denominator of Am expression */
+	float error;          /* accumulated error between original and synthesised */
+	float Wo;
+	float sig, snr;
+	float elow, ehigh, eratio;
+	float sixty;
+	std::complex<float> Ew(0, 0);
+
+	int l_1000hz = model->L*1000.0/(c2const->Fs/2);
+	sig = 1E-4;
+	for(l=1; l<=l_1000hz; l++)
+	{
+		sig += model->A[l]*model->A[l];
+	}
+
+	Wo = model->Wo;
+	error = 1E-4;
+
+	/* Just test across the harmonics in the first 1000 Hz */
+
+	for(l=1; l<=l_1000hz; l++)
+	{
+		Am = std::complex<float>(0.0f, 0.0f);
+		den = 0.0;
+		al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI);
+		bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI);
+
+		/* Estimate amplitude of harmonic assuming harmonic is totally voiced */
+
+		offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5;
+		for(m=al; m<bl; m++)
+		{
+			Am += W[offset+m] * Sw[m];
+			den += W[offset+m]*W[offset+m];
+		}
+
+		Am /= den;
+
+		/* Determine error between estimated harmonic and original */
+
+		for(m=al; m<bl; m++)
+		{
+			Ew = Sw[m] - (W[offset+m] * Am);
+			error += Ew.real() * Ew.real() + Ew.imag() * Ew.imag();
+		}
+	}
+
+	snr = 10.0*log10f(sig/error);
+	if (snr > V_THRESH)
+		model->voiced = 1;
+	else
+		model->voiced = 0;
+
+	/* post processing, helps clean up some voicing errors ------------------*/
+
+	/*
+	   Determine the ratio of low freqency to high frequency energy,
+	   voiced speech tends to be dominated by low frequency energy,
+	   unvoiced by high frequency. This measure can be used to
+	   determine if we have made any gross errors.
+	*/
+
+	int l_2000hz = model->L*2000.0/(c2const->Fs/2);
+	int l_4000hz = model->L*4000.0/(c2const->Fs/2);
+	elow = ehigh = 1E-4;
+	for(l=1; l<=l_2000hz; l++)
+	{
+		elow += model->A[l]*model->A[l];
+	}
+	for(l=l_2000hz; l<=l_4000hz; l++)
+	{
+		ehigh += model->A[l]*model->A[l];
+	}
+	eratio = 10.0*log10f(elow/ehigh);
+
+	/* Look for Type 1 errors, strongly V speech that has been
+	   accidentally declared UV */
+
+	if (model->voiced == 0)
+		if (eratio > 10.0)
+			model->voiced = 1;
+
+	/* Look for Type 2 errors, strongly UV speech that has been
+	   accidentally declared V */
+
+	if (model->voiced == 1)
+	{
+		if (eratio < -10.0)
+			model->voiced = 0;
+
+		/* A common source of Type 2 errors is the pitch estimator
+		   gives a low (50Hz) estimate for UV speech, which gives a
+		   good match with noise due to the close harmoonic spacing.
+		   These errors are much more common than people with 50Hz3
+		   pitch, so we have just a small eratio threshold. */
+
+		sixty = 60.0*TWO_PI/c2const->Fs;
+		if ((eratio < -4.0) && (model->Wo <= sixty))
+			model->voiced = 0;
+	}
+	//printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0);
+
+	return snr;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: make_synthesis_window
+  AUTHOR......: David Rowe
+  DATE CREATED: 11/5/94
+
+  Init function that generates the trapezoidal (Parzen) sythesis window.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::make_synthesis_window(C2CONST *c2const, float Pn[])
+{
+	int   i;
+	float win;
+	int   n_samp = c2const->n_samp;
+	int   tw     = c2const->tw;
+
+	/* Generate Parzen window in time domain */
+
+	win = 0.0;
+	for(i=0; i<n_samp/2-tw; i++)
+		Pn[i] = 0.0;
+	win = 0.0;
+	for(i=n_samp/2-tw; i<n_samp/2+tw; win+=1.0/(2*tw), i++ )
+		Pn[i] = win;
+	for(i=n_samp/2+tw; i<3*n_samp/2-tw; i++)
+		Pn[i] = 1.0;
+	win = 1.0;
+	for(i=3*n_samp/2-tw; i<3*n_samp/2+tw; win-=1.0/(2*tw), i++)
+		Pn[i] = win;
+	for(i=3*n_samp/2+tw; i<2*n_samp; i++)
+		Pn[i] = 0.0;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: synthesise
+  AUTHOR......: David Rowe
+  DATE CREATED: 20/2/95
+
+  Synthesise a speech signal in the frequency domain from the
+  sinusodal model parameters.  Uses overlap-add with a trapezoidal
+  window to smoothly interpolate betwen frames.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::synthesise(
+	int    n_samp,
+	FFTR_STATE *fftr_inv_cfg,
+	float  Sn_[],		/* time domain synthesised signal              */
+	MODEL *model,		/* ptr to model parameters for this frame      */
+	float  Pn[],		/* time domain Parzen window                   */
+	int    shift          /* flag used to handle transition frames       */
+)
+{
+	int   i,l,j,b;	        /* loop variables */
+	std::complex<float>  Sw_[FFT_DEC/2+1];	/* DFT of synthesised signal */
+	float sw_[FFT_DEC];	        /* synthesised signal */
+
+	if (shift)
+	{
+		/* Update memories */
+		for(i=0; i<n_samp-1; i++)
+		{
+			Sn_[i] = Sn_[i+n_samp];
+		}
+		Sn_[n_samp-1] = 0.0;
+	}
+
+	for(i=0; i<FFT_DEC/2+1; i++)
+	{
+		Sw_[i].real(0);
+		Sw_[i].imag(0);
+	}
+
+	/* Now set up frequency domain synthesised speech */
+
+	for(l=1; l<=model->L; l++)
+	{
+		b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5);
+		if (b > ((FFT_DEC/2)-1))
+		{
+			b = (FFT_DEC/2)-1;
+		}
+		Sw_[b] = std::polar(model->A[l], model->phi[l]);
+	}
+
+	/* Perform inverse DFT */
+
+	kiss.fftri(*fftr_inv_cfg, Sw_,sw_);
+
+	/* Overlap add to previous samples */
+
+	for(i=0; i<n_samp-1; i++)
+	{
+		Sn_[i] += sw_[FFT_DEC-n_samp+1+i]*Pn[i];
+	}
+
+	if (shift)
+		for(i=n_samp-1,j=0; i<2*n_samp; i++,j++)
+			Sn_[i] = sw_[j]*Pn[i];
+	else
+		for(i=n_samp-1,j=0; i<2*n_samp; i++,j++)
+			Sn_[i] += sw_[j]*Pn[i];
+}
+
+int CCodec2::codec2_rand(void)
+{
+	static unsigned long next = 1;
+	next = next * 1103515245 + 12345;
+	return((unsigned)(next/65536) % 32768);
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: interp_Wo()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22 May 2012
+
+  Interpolates centre 10ms sample of Wo and L samples given two
+  samples 20ms apart. Assumes voicing is available for centre
+  (interpolated) frame.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::interp_Wo(
+	MODEL *interp,    /* interpolated model params                     */
+	MODEL *prev,      /* previous frames model params                  */
+	MODEL *next,      /* next frames model params                      */
+	float  Wo_min
+)
+{
+	interp_Wo2(interp, prev, next, 0.5, Wo_min);
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: interp_Wo2()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22 May 2012
+
+  Weighted interpolation of two Wo samples.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::interp_Wo2(
+	MODEL *interp,    /* interpolated model params                     */
+	MODEL *prev,      /* previous frames model params                  */
+	MODEL *next,      /* next frames model params                      */
+	float  weight,
+	float  Wo_min
+)
+{
+	/* trap corner case where voicing est is probably wrong */
+
+	if (interp->voiced && !prev->voiced && !next->voiced)
+	{
+		interp->voiced = 0;
+	}
+
+	/* Wo depends on voicing of this and adjacent frames */
+
+	if (interp->voiced)
+	{
+		if (prev->voiced && next->voiced)
+			interp->Wo = (1.0 - weight)*prev->Wo + weight*next->Wo;
+		if (!prev->voiced && next->voiced)
+			interp->Wo = next->Wo;
+		if (prev->voiced && !next->voiced)
+			interp->Wo = prev->Wo;
+	}
+	else
+	{
+		interp->Wo = Wo_min;
+	}
+	interp->L = PI/interp->Wo;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: interp_energy()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22 May 2012
+
+  Interpolates centre 10ms sample of energy given two samples 20ms
+  apart.
+
+\*---------------------------------------------------------------------------*/
+
+float CCodec2::interp_energy(float prev_e, float next_e)
+{
+	//return powf(10.0, (log10f(prev_e) + log10f(next_e))/2.0);
+	return sqrtf(prev_e * next_e); //looks better is math. identical and faster math
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: interpolate_lsp_ver2()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22 May 2012
+
+  Weighted interpolation of LSPs.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::interpolate_lsp_ver2(float interp[], float prev[],  float next[], float weight, int order)
+{
+	int i;
+
+	for(i=0; i<order; i++)
+		interp[i] = (1.0 - weight)*prev[i] + weight*next[i];
+}
+
+/*---------------------------------------------------------------------------*\
+
+  Introduction to Line Spectrum Pairs (LSPs)
+  ------------------------------------------
+
+  LSPs are used to encode the LPC filter coefficients {ak} for
+  transmission over the channel.  LSPs have several properties (like
+  less sensitivity to quantisation noise) that make them superior to
+  direct quantisation of {ak}.
+
+  A(z) is a polynomial of order lpcrdr with {ak} as the coefficients.
+
+  A(z) is transformed to P(z) and Q(z) (using a substitution and some
+  algebra), to obtain something like:
+
+    A(z) = 0.5[P(z)(z+z^-1) + Q(z)(z-z^-1)]  (1)
+
+  As you can imagine A(z) has complex zeros all over the z-plane. P(z)
+  and Q(z) have the very neat property of only having zeros _on_ the
+  unit circle.  So to find them we take a test point z=exp(jw) and
+  evaluate P (exp(jw)) and Q(exp(jw)) using a grid of points between 0
+  and pi.
+
+  The zeros (roots) of P(z) also happen to alternate, which is why we
+  swap coefficients as we find roots.  So the process of finding the
+  LSP frequencies is basically finding the roots of 5th order
+  polynomials.
+
+  The root so P(z) and Q(z) occur in symmetrical pairs at +/-w, hence
+  the name Line Spectrum Pairs (LSPs).
+
+  To convert back to ak we just evaluate (1), "clocking" an impulse
+  thru it lpcrdr times gives us the impulse response of A(z) which is
+  {ak}.
+
+\*---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: lsp_to_lpc()
+  AUTHOR......: David Rowe
+  DATE CREATED: 24/2/93
+
+  This function converts LSP coefficients to LPC coefficients.  In the
+  Speex code we worked out a way to simplify this significantly.
+
+\*---------------------------------------------------------------------------*/
+
+void CCodec2::lsp_to_lpc(float *lsp, float *ak, int order)
+/*  float *freq         array of LSP frequencies in radians     	*/
+/*  float *ak 		array of LPC coefficients 			*/
+/*  int order     	order of LPC coefficients 			*/
+
+
+{
+	int i,j;
+	float xout1,xout2,xin1,xin2;
+	float *pw,*n1,*n2,*n3,*n4 = 0;
+	float freq[order];
+	float Wp[(order * 4) + 2];
+
+	/* convert from radians to the x=cos(w) domain */
+
+	for(i=0; i<order; i++)
+		freq[i] = cosf(lsp[i]);
+
+	pw = Wp;
+
+	/* initialise contents of array */
+
+	for(i=0; i<=4*(order/2)+1; i++)        	/* set contents of buffer to 0 */
+	{
+		*pw++ = 0.0;
+	}
+
+	/* Set pointers up */
+
+	pw = Wp;
+	xin1 = 1.0;
+	xin2 = 1.0;
+
+	/* reconstruct P(z) and Q(z) by cascading second order polynomials
+	  in form 1 - 2xz(-1) +z(-2), where x is the LSP coefficient */
+
+	for(j=0; j<=order; j++)
+	{
+		for(i=0; i<(order/2); i++)
+		{
+			n1 = pw+(i*4);
+			n2 = n1 + 1;
+			n3 = n2 + 1;
+			n4 = n3 + 1;
+			xout1 = xin1 - 2*(freq[2*i]) * *n1 + *n2;
+			xout2 = xin2 - 2*(freq[2*i+1]) * *n3 + *n4;
+			*n2 = *n1;
+			*n4 = *n3;
+			*n1 = xin1;
+			*n3 = xin2;
+			xin1 = xout1;
+			xin2 = xout2;
+		}
+		xout1 = xin1 + *(n4+1);
+		xout2 = xin2 - *(n4+2);
+		ak[j] = (xout1 + xout2)*0.5;
+		*(n4+1) = xin1;
+		*(n4+2) = xin2;
+
+		xin1 = 0.0;
+		xin2 = 0.0;
+	}
+}
diff --git a/codec2/codec2.h b/codec2/codec2.h
new file mode 100644
index 0000000..a74b9d4
--- /dev/null
+++ b/codec2/codec2.h
@@ -0,0 +1,98 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: codec2.h
+  AUTHOR......: David Rowe
+  DATE CREATED: 21 August 2010
+
+  Codec 2 fully quantised encoder and decoder functions.  If you want use
+  Codec 2, these are the functions you need to call.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2010 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CODEC2__
+#define  __CODEC2__
+
+#include <complex>
+
+#include "codec2_internal.h"
+#include "defines.h"
+#include "kiss_fft.h"
+#include "nlp.h"
+#include "quantise.h"
+
+#define CODEC2_MODE_3200 	0
+#define CODEC2_MODE_1600 	2
+
+#ifndef CODEC2_MODE_EN_DEFAULT
+#define CODEC2_MODE_EN_DEFAULT 1
+#endif
+
+#define CODEC2_RAND_MAX 32767
+
+class CCodec2
+{
+public:
+	CCodec2(bool is_3200);
+	~CCodec2();
+	void codec2_encode(unsigned char *bits, const short *speech_in);
+	void codec2_decode(short *speech_out, const unsigned char *bits);
+	int  codec2_samples_per_frame();
+	int  codec2_bits_per_frame();
+
+private:
+	// merged from other files
+	void sample_phase(MODEL *model, std::complex<float> filter_phase[], std::complex<float> A[]);
+	void phase_synth_zero_order(int n_samp, MODEL *model, float *ex_phase, std::complex<float> filter_phase[]);
+	void postfilter(MODEL *model, float *bg_est);
+
+	C2CONST c2const_create(int Fs, float framelength_ms);
+
+	void make_analysis_window(C2CONST *c2const, FFT_STATE *fft_fwd_cfg, float w[], float W[]);
+	void dft_speech(C2CONST *c2const, FFT_STATE &fft_fwd_cfg, std::complex<float> Sw[], float Sn[], float w[]);
+	void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, std::complex<float> Sw[]);
+	void estimate_amplitudes(MODEL *model, std::complex<float> Sw[], int est_phase);
+	float est_voicing_mbe(C2CONST *c2const, MODEL *model, std::complex<float> Sw[], float W[]);
+	void make_synthesis_window(C2CONST *c2const, float Pn[]);
+	void synthesise(int n_samp, FFTR_STATE *fftr_inv_cfg, float Sn_[], MODEL *model, float Pn[], int shift);
+	int codec2_rand(void);
+	void hs_pitch_refinement(MODEL *model, std::complex<float> Sw[], float pmin, float pmax, float pstep);
+
+	void interp_Wo(MODEL *interp, MODEL *prev, MODEL *next, float Wo_min);
+	void interp_Wo2(MODEL *interp, MODEL *prev, MODEL *next, float weight, float Wo_min);
+	float interp_energy(float prev, float next);
+	void interpolate_lsp_ver2(float interp[], float prev[],  float next[], float weight, int order);
+
+	void analyse_one_frame(MODEL *model, const short *speech);
+	void synthesise_one_frame(short speech[], MODEL *model, std::complex<float> Aw[], float gain);
+	void codec2_encode_3200(unsigned char *bits, const short *speech);
+	void codec2_encode_1600(unsigned char *bits, const short *speech);
+	void codec2_decode_3200(short *speech, const unsigned char *bits);
+	void codec2_decode_1600(short *speech, const unsigned char *bits);
+	void ear_protection(float in_out[], int n);
+	void lsp_to_lpc(float *freq, float *ak, int lpcrdr);
+
+	void (CCodec2::*encode)(unsigned char *bits, const short *speech);
+	void (CCodec2::*decode)(short *speech, const unsigned char *bits);
+	Cnlp nlp;
+	CQuantize qt;
+	CODEC2 c2;
+};
+
+#endif
diff --git a/codec2/codec2_internal.h b/codec2/codec2_internal.h
new file mode 100644
index 0000000..1b037e1
--- /dev/null
+++ b/codec2/codec2_internal.h
@@ -0,0 +1,67 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: codec2_internal.h
+  AUTHOR......: David Rowe
+  DATE CREATED: April 16 2012
+
+  Header file for Codec2 internal states, exposed via this header
+  file to assist in testing.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2012 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CODEC2_INTERNAL__
+#define __CODEC2_INTERNAL__
+
+#include "kiss_fft.h"
+
+using CODEC2 = struct codec2_tag {
+	int                mode;
+	int                Fs;
+	int                n_samp;
+	int                m_pitch;
+	int                gray;                     /* non-zero for gray encoding                */
+	int                lpc_pf;                   /* LPC post filter on                        */
+	int                bass_boost;               /* LPC post filter bass boost                */
+	int                smoothing;                /* enable smoothing for channels with errors */
+	float              ex_phase;                 /* excitation model phase track              */
+	float              bg_est;                   /* background noise estimate for post filter */
+	float              prev_f0_enc;              /* previous frame's f0    estimate           */
+	float              prev_e_dec;               /* previous frame's LPC energy               */
+	float              beta;                     /* LPC post filter parameters                */
+	float              gamma;
+	float              xq_enc[2];                /* joint pitch and energy VQ states          */
+	float              xq_dec[2];
+	float              W[FFT_ENC];	             /* DFT of w[]                                */
+	float              hpf_states[2];            /* high pass filter states                   */
+	float              prev_lsps_dec[LPC_ORD];   /* previous frame's LSPs                     */
+	float             *softdec;                  /* optional soft decn bits from demod        */
+	MODEL              prev_model_dec;           /* previous frame's model parameters         */
+	C2CONST            c2const;
+	FFT_STATE          fft_fwd_cfg;              /* forward FFT config                        */
+	FFTR_STATE         fftr_fwd_cfg;             /* forward real FFT config                   */
+	FFTR_STATE         fftr_inv_cfg;             /* inverse FFT config                        */
+	std::vector<float> w;	                     /* [m_pitch] time domain hamming window      */
+	std::vector<float> Pn;	                     /* [2*n_samp] trapezoidal synthesis window   */
+	std::vector<float> Sn;                       /* [m_pitch] input speech                    */
+	std::vector<float> Sn_;	                     /* [2*n_samp] synthesised output speech      */
+	std::vector<float> bpf_buf;                  /* buffer for band pass filter               */
+};
+
+#endif
diff --git a/codec2/defines.h b/codec2/defines.h
new file mode 100644
index 0000000..f201313
--- /dev/null
+++ b/codec2/defines.h
@@ -0,0 +1,127 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: defines.h
+  AUTHOR......: David Rowe
+  DATE CREATED: 23/4/93
+
+  Defines and structures used throughout the codec.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __DEFINES__
+#define __DEFINES__
+
+#include <complex>
+#include <vector>
+
+/*---------------------------------------------------------------------------*\
+
+				DEFINES
+
+\*---------------------------------------------------------------------------*/
+
+/* General defines */
+
+#define N_S        0.01         /* internal proc frame length in secs   */
+#define TW_S       0.005        /* trapezoidal synth window overlap     */
+#define MAX_AMP    160			/* maximum number of harmonics          */
+#ifndef PI
+#define PI         3.141592654	/* mathematical constant                */
+#endif
+#define TWO_PI     6.283185307	/* mathematical constant                */
+#define MAX_STR    2048         /* maximum string size                  */
+
+#define FFT_ENC    512			/* size of FFT used for encoder         */
+#define FFT_DEC    512	    	/* size of FFT used in decoder          */
+#define V_THRESH   6.0          /* voicing threshold in dB              */
+#define LPC_ORD    10			/* LPC order                            */
+#define LPC_ORD_LOW 6			/* LPC order for lower rates            */
+
+/* Pitch estimation defines */
+
+#define M_PITCH_S  0.0400       /* pitch analysis window in s           */
+#define P_MIN_S    0.0025		/* minimum pitch period in s            */
+#define P_MAX_S    0.0200		/* maximum pitch period in s            */
+#define MAXFACTORS 32			// e.g. an fft of length 128 has 4 factors
+ 								// as far as kissfft is concerned 4*4*4*2
+
+/*---------------------------------------------------------------------------*\
+
+				TYPEDEFS
+
+\*---------------------------------------------------------------------------*/
+
+/* Structure to hold constants calculated at run time based on sample rate */
+
+using C2CONST = struct c2const_tag
+{
+    int   Fs;            /* sample rate of this instance             */
+    int   n_samp;        /* number of samples per 10ms frame at Fs   */
+    int   max_amp;       /* maximum number of harmonics              */
+    int   m_pitch;       /* pitch estimation window size in samples  */
+    int   p_min;         /* minimum pitch period in samples          */
+    int   p_max;         /* maximum pitch period in samples          */
+    float Wo_min;
+    float Wo_max;
+    int   nw;            /* analysis window size in samples          */
+    int   tw;            /* trapezoidal synthesis window overlap     */
+};
+
+/* Structure to hold model parameters for one frame */
+
+using MODEL = struct model_tag
+{
+    float Wo;		  /* fundamental frequency estimate in radians  */
+    int   L;		  /* number of harmonics                        */
+    float A[MAX_AMP+1];	  /* amplitiude of each harmonic                */
+    float phi[MAX_AMP+1]; /* phase of each harmonic                     */
+    int   voiced;	  /* non-zero if this frame is voiced           */
+};
+
+/* describes each codebook  */
+
+struct lsp_codebook
+{
+	int     k; /* dimension of vector  */
+	int log2m; /* number of bits in m  */
+	int     m; /* elements in codebook */
+	float *cb; /* The elements         */
+};
+
+using FFT_STATE = struct fft_state_tag
+{
+    int  nfft;
+    bool inverse;
+    int  factors[2*MAXFACTORS];
+    std::vector<std::complex<float>> twiddles;
+};
+
+using FFTR_STATE = struct fftr_state_tag
+{
+	FFT_STATE substate;
+	std::vector<std::complex<float>> tmpbuf;
+	std::vector<std::complex<float>> super_twiddles;
+};
+
+extern const struct lsp_codebook lsp_cb[];
+extern const struct lsp_codebook lsp_cbd[];
+extern const struct lsp_codebook ge_cb[];
+
+#endif
diff --git a/codec2/kiss_fft.cpp b/codec2/kiss_fft.cpp
new file mode 100644
index 0000000..cefb846
--- /dev/null
+++ b/codec2/kiss_fft.cpp
@@ -0,0 +1,435 @@
+/*
+Copyright (c) 2003-2010, Mark Borgerding
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+    * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <cstring>
+#include <cassert>
+
+#include "defines.h"
+#include "kiss_fft.h"
+
+void CKissFFT::kf_bfly2(std::complex<float> *Fout, const size_t fstride, FFT_STATE &st, int m)
+{
+	std::complex<float> *Fout2;
+	std::complex<float> *tw1 = st.twiddles.data();
+	std::complex<float> t;
+	Fout2 = Fout + m;
+	do
+	{
+		t = *Fout2 * *tw1;
+		tw1 += fstride;
+		*Fout2 = *Fout - t;
+		*Fout += t;
+		++Fout2;
+		++Fout;
+	}
+	while (--m);
+}
+
+void CKissFFT::kf_bfly3(std::complex<float> * Fout, const size_t fstride, FFT_STATE &st, int m)
+{
+	const size_t m2 = 2 * m;
+	std::complex<float> *tw1,*tw2;
+	std::complex<float> scratch[5];
+	std::complex<float> epi3;
+	epi3 = st.twiddles[fstride*m];
+
+	tw1 = tw2 = st.twiddles.data();
+
+	do
+	{
+		scratch[1] = Fout[m] * *tw1;
+		scratch[2] = Fout[m2] * *tw2;
+
+		scratch[3] = scratch[1] + scratch[2];
+		scratch[0] = scratch[1] - scratch[2];
+		tw1 += fstride;
+		tw2 += fstride*2;
+
+		Fout[m] = *Fout - (0.5f * scratch[3]);
+
+		scratch[0] *= epi3.imag();
+
+		*Fout += scratch[3];
+
+		Fout[m2].real(Fout[m].real() + scratch[0].imag());
+		Fout[m2].imag(Fout[m].imag() - scratch[0].real());
+
+		Fout[m].real(Fout[m].real() - scratch[0].imag());
+		Fout[m].imag(Fout[m].imag() + scratch[0].real());
+
+		++Fout;
+	}
+	while(--m);
+}
+
+void CKissFFT::kf_bfly4(std::complex<float> *Fout, const size_t fstride, FFT_STATE &st, int m)
+{
+	std::complex<float> *tw1,*tw2,*tw3;
+	std::complex<float> scratch[6];
+	int k = m;
+	const int m2 = 2 * m;
+	const int m3 = 3 * m;
+
+
+	tw3 = tw2 = tw1 = st.twiddles.data();
+
+	do
+	{
+		scratch[0] = Fout[m] * *tw1;
+		scratch[1] = Fout[m2] * *tw2;
+		scratch[2] = Fout[m3] * *tw3;
+
+		scratch[5] = *Fout - scratch[1];
+		*Fout += scratch[1];
+		scratch[3] = scratch[0] + scratch[2];
+		scratch[4] = scratch[0] - scratch[2];
+		Fout[m2] = *Fout - scratch[3];
+		tw1 += fstride;
+		tw2 += fstride*2;
+		tw3 += fstride*3;
+		*Fout += scratch[3];
+
+		if(st.inverse)
+		{
+			Fout[m].real(scratch[5].real() - scratch[4].imag());
+			Fout[m].imag(scratch[5].imag() + scratch[4].real());
+			Fout[m3].real(scratch[5].real() + scratch[4].imag());
+			Fout[m3].imag(scratch[5].imag() - scratch[4].real());
+		}
+		else
+		{
+			Fout[m].real(scratch[5].real() + scratch[4].imag());
+			Fout[m].imag(scratch[5].imag() - scratch[4].real());
+			Fout[m3].real(scratch[5].real() - scratch[4].imag());
+			Fout[m3].imag(scratch[5].imag() + scratch[4].real());
+		}
+		++Fout;
+	}
+	while(--k);
+}
+
+void CKissFFT::kf_bfly5(std::complex<float> * Fout, const size_t fstride, FFT_STATE &st, int m)
+{
+	std::complex<float> scratch[13];
+	std::complex<float> *twiddles = st.twiddles.data();
+	auto ya = twiddles[fstride*m];
+	auto yb = twiddles[fstride*2*m];
+
+	auto Fout0 = Fout;
+	auto Fout1 = Fout0 + m;
+	auto Fout2 = Fout0 + 2 * m;
+	auto Fout3 = Fout0 + 3 * m;
+	auto Fout4 = Fout0 + 4 * m;
+
+	auto tw = st.twiddles.data();
+	for (int u=0; u<m; ++u)
+	{
+		scratch[0] = *Fout0;
+
+		scratch[1] = *Fout1 *  tw[u*fstride];
+		scratch[2] = *Fout2 *  tw[2*u*fstride];
+		scratch[3] = *Fout3 *  tw[3*u*fstride];
+		scratch[4] = *Fout4 *  tw[4*u*fstride];
+
+		 scratch[7] = scratch[1] + scratch[4];
+		 scratch[10] = scratch[1] - scratch[4];
+		 scratch[8] = scratch[2] + scratch[3];
+		 scratch[9] = scratch[2] - scratch[3];
+
+		*Fout0 += scratch[7] + scratch[8];
+
+		scratch[5] = scratch[0] + (scratch[7] * ya.real()) + (scratch[8] * yb.real());
+
+		scratch[6].real( (scratch[10].imag() * ya.imag()) + (scratch[9].imag() * yb.imag()));
+		scratch[6].imag(-(scratch[10].real() * ya.imag()) - (scratch[9].real() * yb.imag()));
+
+		*Fout1 = scratch[5] - scratch[6];
+		*Fout4 = scratch[5] + scratch[6];
+
+		scratch[11] = scratch[0] + (scratch[7] * yb.real()) + (scratch[8] * ya.real());
+		scratch[12].real(-(scratch[10].imag() * yb.imag()) + (scratch[9].imag() * ya.imag()));
+		scratch[12].imag( (scratch[10].real() * yb.imag()) - (scratch[9].real() * ya.imag()));
+
+		*Fout2 = scratch[11] + scratch[12];
+		*Fout3 = scratch[11] - scratch[12];
+
+		++Fout0;
+		++Fout1;
+		++Fout2;
+		++Fout3;
+		++Fout4;
+	}
+}
+
+/* perform the butterfly for one stage of a mixed radix FFT */
+void CKissFFT::kf_bfly_generic(std::complex<float> *Fout, const size_t fstride, FFT_STATE &st, int m, int p)
+{
+	auto twiddles = st.twiddles.data();
+	std::complex<float> t;
+	int Norig = st.nfft;
+
+	std::vector<std::complex<float>> scratch(p);
+
+	for (int u=0; u<m; ++u)
+	{
+		int k = u;
+		for (int q1=0 ; q1<p ; ++q1)
+		{
+			scratch[q1] = Fout[k];
+			k += m;
+		}
+
+		k = u;
+		for (int q1=0 ; q1<p ; ++q1)
+		{
+			int twidx = 0;
+			Fout[k] = scratch[0];
+			for (int q=1; q<p; ++q)
+			{
+				twidx += fstride * k;
+				if (twidx >= Norig) twidx-=Norig;
+				t = scratch[q] * twiddles[twidx];
+				Fout[k] += t;
+			}
+			k += m;
+		}
+	}
+	scratch.clear();
+}
+
+void CKissFFT::kf_work(std::complex<float> *Fout, const std::complex<float> *f, const size_t fstride, int in_stride, int *factors, FFT_STATE &st)
+{
+	auto Fout_beg = Fout;
+	const int p = *factors++; /* the radix  */
+	const int m = *factors++; /* stage's fft length/p */
+	const std::complex<float> *Fout_end = Fout + p*m;
+
+	if (m==1)
+	{
+		do
+		{
+			*Fout = *f;
+			f += fstride*in_stride;
+		}
+		while( ++Fout != Fout_end );
+	}
+	else
+	{
+		do
+		{
+			// recursive call:
+			// DFT of size m*p performed by doing
+			// p instances of smaller DFTs of size m,
+			// each one takes a decimated version of the input
+			kf_work( Fout, f, fstride*p, in_stride, factors, st);
+			f += fstride*in_stride;
+		}
+		while( (Fout += m) != Fout_end );
+	}
+
+	Fout=Fout_beg;
+
+	// recombine the p smaller DFTs
+	switch (p)
+	{
+	case 2:
+		kf_bfly2(Fout,fstride,st,m);
+		break;
+	case 3:
+		kf_bfly3(Fout,fstride,st,m);
+		break;
+	case 4:
+		kf_bfly4(Fout,fstride,st,m);
+		break;
+	case 5:
+		kf_bfly5(Fout,fstride,st,m);
+		break;
+	default:
+		kf_bfly_generic(Fout,fstride,st,m,p);
+		break;
+	}
+}
+
+/*  facbuf is populated by p1,m1,p2,m2, ...
+    where
+    p[i] * m[i] = m[i-1]
+    m0 = n                  */
+void CKissFFT::kf_factor(int n,int * facbuf)
+{
+	int p=4;
+	double floor_sqrt;
+	floor_sqrt = floorf( sqrtf((double)n) );
+
+	/*factor out powers of 4, powers of 2, then any remaining primes */
+	do
+	{
+		while (n % p)
+		{
+			switch (p)
+			{
+			case 4:
+				p = 2;
+				break;
+			case 2:
+				p = 3;
+				break;
+			default:
+				p += 2;
+				break;
+			}
+			if (p > floor_sqrt)
+				p = n;          /* no more factors, skip to end */
+		}
+		n /= p;
+		*facbuf++ = p;
+		*facbuf++ = n;
+	}
+	while (n > 1);
+}
+
+void CKissFFT::fft_alloc(FFT_STATE &state, const int nfft, bool inverse_fft)
+{
+	state.twiddles.resize(nfft);
+
+	state.nfft = nfft;
+	state.inverse = inverse_fft;
+
+	for (int i=0; i<nfft; ++i)
+	{
+		const double pi=3.141592653589793238462643383279502884197169399375105820974944;
+		double phase = -2.0 * pi * i / nfft;
+		if (state.inverse)
+			phase *= -1.0;
+		state.twiddles[i] = std::polar(1.0f, float(phase));
+	}
+
+	kf_factor(nfft, state.factors);
+}
+
+
+void CKissFFT::fft_stride(FFT_STATE &st, const std::complex<float> *fin, std::complex<float> *fout, int in_stride)
+{
+	if (fin == fout)
+	{
+		//NOTE: this is not really an in-place FFT algorithm.
+		//It just performs an out-of-place FFT into a temp buffer
+		std::vector<std::complex<float>> tmpbuf(st.nfft);
+		kf_work(tmpbuf.data(), fin, true, in_stride, st.factors, st);
+		memcpy(fout, tmpbuf.data(), sizeof(std::complex<float>)*st.nfft);
+		tmpbuf.clear();
+	}
+	else
+	{
+		kf_work(fout, fin, 1, in_stride, st.factors, st);
+	}
+}
+
+void CKissFFT::fft(FFT_STATE &cfg, const std::complex<float> *fin, std::complex<float> *fout)
+{
+	fft_stride(cfg, fin, fout, 1);
+}
+
+int CKissFFT::fft_next_fast_size(int n)
+{
+	while(1)
+	{
+		int m = n;
+		while ( (m % 2) == 0 ) m /= 2;
+		while ( (m % 3) == 0 ) m /= 3;
+		while ( (m % 5) == 0 ) m /= 5;
+		if (m <= 1)
+			break; /* n is completely factorable by twos, threes, and fives */
+		n++;
+	}
+	return n;
+}
+
+void CKissFFT::fftr_alloc(FFTR_STATE &st, int nfft, const bool inverse_fft)
+{
+	nfft >>= 1;
+
+	fft_alloc(st.substate, nfft, inverse_fft);
+	st.tmpbuf.resize(nfft);
+	st.super_twiddles.resize(nfft);
+
+	for (int i=0; i<nfft/2; ++i)
+	{
+		double phase = -3.141592653589793238462643383279502884197169399375105820974944 * (double(i+1) / nfft + .5);
+		if (inverse_fft)
+			phase *= -1.0;
+		st.super_twiddles[i] = std::polar(1.0f, float(phase));
+	}
+}
+
+void CKissFFT::fftr(FFTR_STATE &st, const float *timedata, std::complex<float> *freqdata)
+{
+	assert(st.substate.inverse == false);
+
+	auto ncfft = st.substate.nfft;
+
+	/*perform the parallel fft of two real signals packed in real,imag*/
+	fft( st.substate, (const std::complex<float>*)timedata, st.tmpbuf.data());
+	/* The real part of the DC element of the frequency spectrum in st->tmpbuf
+	 * contains the sum of the even-numbered elements of the input time sequence
+	 * The imag part is the sum of the odd-numbered elements
+	 *
+	 * The sum of tdc.r and tdc.i is the sum of the input time sequence.
+	 *      yielding DC of input time sequence
+	 * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
+	 *      yielding Nyquist bin of input time sequence
+	 */
+
+	auto tdc = st.tmpbuf[0];
+	freqdata[0].real(tdc.real() + tdc.imag());
+	freqdata[ncfft].real(tdc.real() - tdc.imag());
+	freqdata[ncfft].imag(0.f);
+	freqdata[0].imag(0.f);
+
+	for (int  k=1; k <= ncfft/2; ++k)
+	{
+		auto fpk = st.tmpbuf[k];
+		auto fpnk = std::conj(st.tmpbuf[ncfft-k]);
+
+		auto f1k = fpk + fpnk;
+		auto f2k = fpk - fpnk;
+		auto tw = f2k * st.super_twiddles[k-1];
+
+		freqdata[k] = 0.5f * (f1k + tw);
+		freqdata[ncfft-k].real(0.5f * (f1k.real() - tw.real()));
+		freqdata[ncfft-k].imag(0.5f * (tw.imag() - f1k.imag()));
+	}
+}
+
+void CKissFFT::fftri(FFTR_STATE &st, const std::complex<float> *freqdata, float *timedata)
+{
+	assert(st.substate.inverse == true);
+
+	auto ncfft = st.substate.nfft;
+
+	st.tmpbuf[0].real(freqdata[0].real() + freqdata[ncfft].real());
+	st.tmpbuf[0].imag(freqdata[0].real() - freqdata[ncfft].real());
+
+	for (int k=1; k <= ncfft/2; ++k)
+	{
+		auto fk = freqdata[k];
+		auto fnkc = std::conj(freqdata[ncfft - k]);
+
+		auto fek = fk + fnkc;
+		auto tmp = fk - fnkc;
+		auto fok = tmp * st.super_twiddles[k-1];
+		st.tmpbuf[k] = fek + fok;
+		st.tmpbuf[ncfft - k] = std::conj(fek - fok);
+	}
+	fft (st.substate, st.tmpbuf.data(), (std::complex<float> *)timedata);
+}
diff --git a/codec2/kiss_fft.h b/codec2/kiss_fft.h
new file mode 100644
index 0000000..7626617
--- /dev/null
+++ b/codec2/kiss_fft.h
@@ -0,0 +1,35 @@
+#ifndef KISS_FFT_H
+#define KISS_FFT_H
+
+#include <complex>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+
+#include "defines.h"
+
+/* for real ffts, we need an even size */
+#define kiss_fftr_next_fast_size_real(n) (kiss_fft_next_fast_size( ((n)+1) >> 1) << 1 )
+
+class CKissFFT
+{
+public:
+	void fft_alloc(FFT_STATE &state, const int nfft, const bool inverse_fft);
+	void fft(FFT_STATE &cfg, const std::complex<float> *fin, std::complex<float> *fout);
+	void fft_stride(FFT_STATE &cfg, const std::complex<float> *fin, std::complex<float> *fout, int fin_stride);
+	int fft_next_fast_size(int n);
+	void fftr_alloc(FFTR_STATE &state, int nfft, const bool inverse_fft);
+	void fftr(FFTR_STATE &cfg,const float *timedata,std::complex<float> *freqdata);
+	void fftri(FFTR_STATE &cfg,const std::complex<float> *freqdata,float *timedata);
+private:
+	void kf_bfly2(std::complex<float> *Fout, const size_t fstride, FFT_STATE &st, int m);
+	void kf_bfly3(std::complex<float> *Fout, const size_t fstride, FFT_STATE &st, int m);
+	void kf_bfly4(std::complex<float> *Fout, const size_t fstride, FFT_STATE &st, int m);
+	void kf_bfly5(std::complex<float> *Fout, const size_t fstride, FFT_STATE &st, int m);
+	void kf_bfly_generic(std::complex<float> *Fout, const size_t fstride, FFT_STATE &st, int m, int p);
+	void kf_work(std::complex<float> *Fout, const std::complex<float> *f, const size_t fstride, int in_stride, int *factors, FFT_STATE &st);
+	void kf_factor(int n, int *facbuf);
+};
+#endif
diff --git a/codec2/lpc.cpp b/codec2/lpc.cpp
new file mode 100644
index 0000000..10f2c09
--- /dev/null
+++ b/codec2/lpc.cpp
@@ -0,0 +1,311 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: lpc.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 30 Sep 1990 (!)
+
+  Linear Prediction functions written in C.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009-2012 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define LPC_MAX_N 512		/* maximum no. of samples in frame */
+#define PI 3.141592654		/* mathematical constant */
+
+#define ALPHA 1.0
+#define BETA  0.94
+
+#include <assert.h>
+#include <math.h>
+#include "defines.h"
+#include "lpc.h"
+
+/*---------------------------------------------------------------------------*\
+
+  pre_emp()
+
+  Pre-emphasise (high pass filter with zero close to 0 Hz) a frame of
+  speech samples.  Helps reduce dynamic range of LPC spectrum, giving
+  greater weight and hense a better match to low energy formants.
+
+  Should be balanced by de-emphasis of the output speech.
+
+\*---------------------------------------------------------------------------*/
+
+void Clpc::pre_emp(
+	float  Sn_pre[], /* output frame of speech samples                     */
+	float  Sn[],	   /* input frame of speech samples                      */
+	float *mem,      /* Sn[-1]single sample memory                         */
+	int   Nsam	   /* number of speech samples to use                    */
+)
+{
+	int   i;
+
+	for(i=0; i<Nsam; i++)
+	{
+		Sn_pre[i] = Sn[i] - ALPHA * mem[0];
+		mem[0] = Sn[i];
+	}
+
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+  de_emp()
+
+  De-emphasis filter (low pass filter with a pole close to 0 Hz).
+
+\*---------------------------------------------------------------------------*/
+
+void Clpc::de_emp(
+	float  Sn_de[],  /* output frame of speech samples                     */
+	float  Sn[],	   /* input frame of speech samples                      */
+	float *mem,      /* Sn[-1]single sample memory                         */
+	int    Nsam	   /* number of speech samples to use                    */
+)
+{
+	int   i;
+
+	for(i=0; i<Nsam; i++)
+	{
+		Sn_de[i] = Sn[i] + BETA * mem[0];
+		mem[0] = Sn_de[i];
+	}
+
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+  hanning_window()
+
+  Hanning windows a frame of speech samples.
+
+\*---------------------------------------------------------------------------*/
+
+void Clpc::hanning_window(
+	float Sn[],	/* input frame of speech samples */
+	float Wn[],	/* output frame of windowed samples */
+	int Nsam	/* number of samples */
+)
+{
+	int i;	/* loop variable */
+
+	for(i=0; i<Nsam; i++)
+		Wn[i] = Sn[i]*(0.5 - 0.5*cosf(2*PI*(float)i/(Nsam-1)));
+}
+
+/*---------------------------------------------------------------------------*\
+
+  autocorrelate()
+
+  Finds the first P autocorrelation values of an array of windowed speech
+  samples Sn[].
+
+\*---------------------------------------------------------------------------*/
+
+void Clpc::autocorrelate(
+	float Sn[],	/* frame of Nsam windowed speech samples */
+	float Rn[],	/* array of P+1 autocorrelation coefficients */
+	int Nsam,	/* number of windowed samples to use */
+	int order	/* order of LPC analysis */
+)
+{
+	int i,j;	/* loop variables */
+
+	for(j=0; j<order+1; j++)
+	{
+		Rn[j] = 0.0;
+		for(i=0; i<Nsam-j; i++)
+			Rn[j] += Sn[i]*Sn[i+j];
+	}
+}
+
+/*---------------------------------------------------------------------------*\
+
+  levinson_durbin()
+
+  Given P+1 autocorrelation coefficients, finds P Linear Prediction Coeff.
+  (LPCs) where P is the order of the LPC all-pole model. The Levinson-Durbin
+  algorithm is used, and is described in:
+
+    J. Makhoul
+    "Linear prediction, a tutorial review"
+    Proceedings of the IEEE
+    Vol-63, No. 4, April 1975
+
+\*---------------------------------------------------------------------------*/
+
+void Clpc::levinson_durbin(
+	float R[],		/* order+1 autocorrelation coeff */
+	float lpcs[],		/* order+1 LPC's */
+	int order		/* order of the LPC analysis */
+)
+{
+	float a[order+1][order+1];
+	float sum, e, k;
+	int i,j;				/* loop variables */
+
+	e = R[0];				/* Equation 38a, Makhoul */
+
+	for(i=1; i<=order; i++)
+	{
+		sum = 0.0;
+		for(j=1; j<=i-1; j++)
+			sum += a[i-1][j]*R[i-j];
+		k = -1.0*(R[i] + sum)/e;		/* Equation 38b, Makhoul */
+		if (fabsf(k) > 1.0)
+			k = 0.0;
+
+		a[i][i] = k;
+
+		for(j=1; j<=i-1; j++)
+			a[i][j] = a[i-1][j] + k*a[i-1][i-j];	/* Equation 38c, Makhoul */
+
+		e *= (1-k*k);				/* Equation 38d, Makhoul */
+	}
+
+	for(i=1; i<=order; i++)
+		lpcs[i] = a[order][i];
+	lpcs[0] = 1.0;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  inverse_filter()
+
+  Inverse Filter, A(z).  Produces an array of residual samples from an array
+  of input samples and linear prediction coefficients.
+
+  The filter memory is stored in the first order samples of the input array.
+
+\*---------------------------------------------------------------------------*/
+
+void Clpc::inverse_filter(
+	float Sn[],	/* Nsam input samples */
+	float a[],	/* LPCs for this frame of samples */
+	int Nsam,	/* number of samples */
+	float res[],	/* Nsam residual samples */
+	int order	/* order of LPC */
+)
+{
+	int i,j;	/* loop variables */
+
+	for(i=0; i<Nsam; i++)
+	{
+		res[i] = 0.0;
+		for(j=0; j<=order; j++)
+			res[i] += Sn[i-j]*a[j];
+	}
+}
+
+/*---------------------------------------------------------------------------*\
+
+ synthesis_filter()
+
+ C version of the Speech Synthesis Filter, 1/A(z).  Given an array of
+ residual or excitation samples, and the the LP filter coefficients, this
+ function will produce an array of speech samples.  This filter structure is
+ IIR.
+
+ The synthesis filter has memory as well, this is treated in the same way
+ as the memory for the inverse filter (see inverse_filter() notes above).
+ The difference is that the memory for the synthesis filter is stored in
+ the output array, wheras the memory of the inverse filter is stored in the
+ input array.
+
+ Note: the calling function must update the filter memory.
+
+\*---------------------------------------------------------------------------*/
+
+void Clpc::synthesis_filter(
+	float res[],	/* Nsam input residual (excitation) samples */
+	float a[],	/* LPCs for this frame of speech samples */
+	int Nsam,	/* number of speech samples */
+	int order,	/* LPC order */
+	float Sn_[]	/* Nsam output synthesised speech samples */
+)
+{
+	int i,j;	/* loop variables */
+
+	/* Filter Nsam samples */
+
+	for(i=0; i<Nsam; i++)
+	{
+		Sn_[i] = res[i]*a[0];
+		for(j=1; j<=order; j++)
+			Sn_[i] -= Sn_[i-j]*a[j];
+	}
+}
+
+/*---------------------------------------------------------------------------*\
+
+  find_aks()
+
+  This function takes a frame of samples, and determines the linear
+  prediction coefficients for that frame of samples.
+
+\*---------------------------------------------------------------------------*/
+
+void Clpc::find_aks(
+	float Sn[],	/* Nsam samples with order sample memory */
+	float a[],	/* order+1 LPCs with first coeff 1.0 */
+	int Nsam,	/* number of input speech samples */
+	int order,	/* order of the LPC analysis */
+	float *E	/* residual energy */
+)
+{
+	float Wn[LPC_MAX_N];	/* windowed frame of Nsam speech samples */
+	float R[order+1];	/* order+1 autocorrelation values of Sn[] */
+	int i;
+
+	assert(Nsam < LPC_MAX_N);
+
+	hanning_window(Sn,Wn,Nsam);
+	autocorrelate(Wn,R,Nsam,order);
+	levinson_durbin(R,a,order);
+
+	*E = 0.0;
+	for(i=0; i<=order; i++)
+		*E += a[i]*R[i];
+	if (*E < 0.0)
+		*E = 1E-12;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  weight()
+
+  Weights a vector of LPCs.
+
+\*---------------------------------------------------------------------------*/
+
+void Clpc::weight(
+	float ak[],	/* vector of order+1 LPCs */
+	float gamma,	/* weighting factor */
+	int order,	/* num LPCs (excluding leading 1.0) */
+	float akw[]	/* weighted vector of order+1 LPCs */
+)
+{
+	int i;
+
+	for(i=1; i<=order; i++)
+		akw[i] = ak[i]*powf(gamma,(float)i);
+}
diff --git a/codec2/lpc.h b/codec2/lpc.h
new file mode 100644
index 0000000..a7d29f7
--- /dev/null
+++ b/codec2/lpc.h
@@ -0,0 +1,47 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: lpc.h
+  AUTHOR......: David Rowe
+  DATE CREATED: 24/8/09
+
+  Linear Prediction functions written in C.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009-2012 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __LPC__
+#define __LPC__
+
+#define LPC_MAX_ORDER 20
+
+class Clpc {
+public:
+	void autocorrelate(float Sn[], float Rn[], int Nsam, int order);
+	void levinson_durbin(float R[],	float lpcs[], int order);
+private:
+	void pre_emp(float Sn_pre[], float Sn[], float *mem, int Nsam);
+	void de_emp(float Sn_se[], float Sn[], float *mem, int Nsam);
+	void hanning_window(float Sn[],	float Wn[], int Nsam);
+	void inverse_filter(float Sn[], float a[], int Nsam, float res[], int order);
+	void synthesis_filter(float res[], float a[], int Nsam,	int order, float Sn_[]);
+	void find_aks(float Sn[], float a[], int Nsam, int order, float *E);
+	void weight(float ak[],	float gamma, int order,	float akw[]);
+};
+
+#endif
diff --git a/codec2/nlp.cpp b/codec2/nlp.cpp
new file mode 100644
index 0000000..753e96b
--- /dev/null
+++ b/codec2/nlp.cpp
@@ -0,0 +1,520 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: nlp.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 23/3/93
+
+  Non Linear Pitch (NLP) estimation functions.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+
+#include "defines.h"
+#include "nlp.h"
+#include "kiss_fft.h"
+
+extern CKissFFT kiss;
+
+/*---------------------------------------------------------------------------*\
+
+ 				GLOBALS
+
+\*---------------------------------------------------------------------------*/
+
+/* 48 tap 600Hz low pass FIR filter coefficients */
+
+static const float nlp_fir[] =
+{
+	-1.0818124e-03,
+	-1.1008344e-03,
+	-9.2768838e-04,
+	-4.2289438e-04,
+	5.5034190e-04,
+	2.0029849e-03,
+	3.7058509e-03,
+	5.1449415e-03,
+	5.5924666e-03,
+	4.3036754e-03,
+	8.0284511e-04,
+	-4.8204610e-03,
+	-1.1705810e-02,
+	-1.8199275e-02,
+	-2.2065282e-02,
+	-2.0920610e-02,
+	-1.2808831e-02,
+	3.2204775e-03,
+	2.6683811e-02,
+	5.5520624e-02,
+	8.6305944e-02,
+	1.1480192e-01,
+	1.3674206e-01,
+	1.4867556e-01,
+	1.4867556e-01,
+	1.3674206e-01,
+	1.1480192e-01,
+	8.6305944e-02,
+	5.5520624e-02,
+	2.6683811e-02,
+	3.2204775e-03,
+	-1.2808831e-02,
+	-2.0920610e-02,
+	-2.2065282e-02,
+	-1.8199275e-02,
+	-1.1705810e-02,
+	-4.8204610e-03,
+	8.0284511e-04,
+	4.3036754e-03,
+	5.5924666e-03,
+	5.1449415e-03,
+	3.7058509e-03,
+	2.0029849e-03,
+	5.5034190e-04,
+	-4.2289438e-04,
+	-9.2768838e-04,
+	-1.1008344e-03,
+	-1.0818124e-03
+};
+
+static const float fdmdv_os_filter[]= {
+    -0.0008215855034550382,
+    -0.0007833023901802921,
+     0.001075563790768233,
+     0.001199092367787555,
+    -0.001765309502928316,
+    -0.002055372115328064,
+     0.002986877604154257,
+     0.003462567920638414,
+    -0.004856570111126334,
+    -0.005563143845031497,
+     0.007533613299748122,
+     0.008563932468880897,
+    -0.01126857129039911,
+    -0.01280782411693687,
+     0.01651443896361847,
+     0.01894875110322284,
+    -0.02421604439474981,
+    -0.02845107338464062,
+     0.03672973563400258,
+     0.04542046150312214,
+    -0.06189165826716491,
+    -0.08721876380763803,
+     0.1496157094199961,
+     0.4497962274137046,
+     0.4497962274137046,
+     0.1496157094199961,
+    -0.08721876380763803,
+    -0.0618916582671649,
+     0.04542046150312216,
+     0.03672973563400257,
+    -0.02845107338464062,
+    -0.02421604439474984,
+     0.01894875110322284,
+     0.01651443896361848,
+    -0.01280782411693687,
+    -0.0112685712903991,
+     0.008563932468880899,
+     0.007533613299748123,
+    -0.005563143845031501,
+    -0.004856570111126346,
+     0.003462567920638419,
+     0.002986877604154259,
+    -0.002055372115328063,
+    -0.001765309502928318,
+     0.001199092367787557,
+     0.001075563790768233,
+    -0.0007833023901802925,
+    -0.0008215855034550383
+};
+
+/*---------------------------------------------------------------------------*\
+
+  nlp_create()
+
+  Initialisation function for NLP pitch estimator.
+
+\*---------------------------------------------------------------------------*/
+
+void Cnlp::nlp_create(C2CONST *c2const)
+{
+	int  i;
+	int  m = c2const->m_pitch;
+	int  Fs = c2const->Fs;
+
+	assert((Fs == 8000) || (Fs == 16000));
+	snlp.Fs = Fs;
+
+	snlp.m = m;
+
+	/* if running at 16kHz allocate storage for decimating filter memory */
+
+	if (Fs == 16000)
+	{
+		snlp.Sn16k.resize(FDMDV_OS_TAPS_16K + c2const->n_samp);
+		for(i=0; i<FDMDV_OS_TAPS_16K; i++)
+		{
+			snlp.Sn16k[i] = 0.0;
+		}
+
+		/* most processing occurs at 8 kHz sample rate so halve m */
+
+		m /= 2;
+	}
+
+	assert(m <= PMAX_M);
+
+	for(i=0; i<m/DEC; i++)
+	{
+		snlp.w[i] = 0.5 - 0.5*cosf(2*PI*i/(m/DEC-1));
+	}
+
+	for(i=0; i<PMAX_M; i++)
+		snlp.sq[i] = 0.0;
+	snlp.mem_x = 0.0;
+	snlp.mem_y = 0.0;
+	for(i=0; i<NLP_NTAP; i++)
+		snlp.mem_fir[i] = 0.0;
+
+	kiss.fft_alloc(snlp.fft_cfg, PE_FFT_SIZE, false);
+}
+
+/*---------------------------------------------------------------------------*\
+
+  nlp_destroy()
+
+  Shut down function for NLP pitch estimator.
+
+\*---------------------------------------------------------------------------*/
+
+void Cnlp::nlp_destroy()
+{
+	snlp.fft_cfg.twiddles.clear();
+}
+
+/*---------------------------------------------------------------------------*\
+
+  nlp()
+
+  Determines the pitch in samples using the Non Linear Pitch (NLP)
+  algorithm [1]. Returns the fundamental in Hz.  Note that the actual
+  pitch estimate is for the centre of the M sample Sn[] vector, not
+  the current N sample input vector.  This is (I think) a delay of 2.5
+  frames with N=80 samples.  You should align further analysis using
+  this pitch estimate to be centred on the middle of Sn[].
+
+  Two post processors have been tried, the MBE version (as discussed
+  in [1]), and a post processor that checks sub-multiples.  Both
+  suffer occasional gross pitch errors (i.e. neither are perfect).  In
+  the presence of background noise the sub-multiple algorithm tends
+  towards low F0 which leads to better sounding background noise than
+  the MBE post processor.
+
+  A good way to test and develop the NLP pitch estimator is using the
+  tnlp (codec2/unittest) and the codec2/octave/plnlp.m Octave script.
+
+  A pitch tracker searching a few frames forward and backward in time
+  would be a useful addition.
+
+  References:
+
+    [1] http://rowetel.com/downloads/1997_rowe_phd_thesis.pdf Chapter 4
+
+\*---------------------------------------------------------------------------*/
+
+float Cnlp::nlp(
+	float  Sn[],   /* input speech vector                                */
+	int    n,      /* frames shift (no. new samples in Sn[])             */
+	float *pitch,  /* estimated pitch period in samples at current Fs    */
+//	std::complex<float>   Sw[],   /* Freq domain version of Sn[]                        */
+//	float  W[],    /* Freq domain window                                 */
+	float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */
+)
+{
+	float  notch;		    /* current notch filter output          */
+	std::complex<float>   Fw[PE_FFT_SIZE]; /* DFT of squared signal (input/output) */
+	float  gmax;
+	int    gmax_bin;
+	int    m, i, j;
+	float  best_f0;
+
+	m = snlp.m;
+
+	/* Square, notch filter at DC, and LP filter vector */
+
+	/* If running at 16 kHz decimate to 8 kHz, as NLP ws designed for
+	   Fs = 8kHz. The decimating filter introduces about 3ms of delay,
+	   that shouldn't be a problem as pitch changes slowly. */
+
+	if (snlp.Fs == 8000)
+	{
+		/* Square latest input samples */
+
+		for(i=m-n; i<m; i++)
+		{
+			snlp.sq[i] = Sn[i]*Sn[i];
+		}
+	}
+	else
+	{
+		assert(snlp.Fs == 16000);
+
+		/* re-sample at 8 KHz */
+
+		for(i=0; i<n; i++)
+		{
+			snlp.Sn16k[FDMDV_OS_TAPS_16K+i] = Sn[m-n+i];
+		}
+
+		m /= 2;
+		n /= 2;
+
+		float Sn8k[n];
+		fdmdv_16_to_8(Sn8k, &snlp.Sn16k[FDMDV_OS_TAPS_16K], n);
+
+		/* Square latest input samples */
+
+		for(i=m-n, j=0; i<m; i++, j++)
+		{
+			snlp.sq[i] = Sn8k[j]*Sn8k[j];
+		}
+		assert(j <= n);
+	}
+
+	for(i=m-n; i<m; i++)  	/* notch filter at DC */
+	{
+		notch = snlp.sq[i] - snlp.mem_x;
+		notch += COEFF*snlp.mem_y;
+		snlp.mem_x = snlp.sq[i];
+		snlp.mem_y = notch;
+		snlp.sq[i] = notch + 1.0;  /* With 0 input vectors to codec,
+				      kiss_fft() would take a long
+				      time to execute when running in
+				      real time.  Problem was traced
+				      to kiss_fft function call in
+				      this function. Adding this small
+				      constant fixed problem.  Not
+				      exactly sure why. */
+	}
+
+	for(i=m-n; i<m; i++)  	/* FIR filter vector */
+	{
+
+		for(j=0; j<NLP_NTAP-1; j++)
+			snlp.mem_fir[j] = snlp.mem_fir[j+1];
+		snlp.mem_fir[NLP_NTAP-1] = snlp.sq[i];
+
+		snlp.sq[i] = 0.0;
+		for(j=0; j<NLP_NTAP; j++)
+			snlp.sq[i] += snlp.mem_fir[j]*nlp_fir[j];
+	}
+
+	/* Decimate and DFT */
+
+	for(i=0; i<PE_FFT_SIZE; i++)
+	{
+		Fw[i].real(0);
+		Fw[i].imag(0);
+	}
+	for(i=0; i<m/DEC; i++)
+	{
+		Fw[i].real(snlp.sq[i*DEC]*snlp.w[i]);
+	}
+
+	// FIXME: check if this can be converted to a real fft
+	// since all imag inputs are 0
+	codec2_fft_inplace(snlp.fft_cfg, Fw);
+
+	for(i=0; i<PE_FFT_SIZE; i++)
+		Fw[i].real(Fw[i].real() * Fw[i].real() + Fw[i].imag() * Fw[i].imag());
+
+	/* todo: express everything in f0, as pitch in samples is dep on Fs */
+
+	int pmin = floor(SAMPLE_RATE*P_MIN_S);
+	int pmax = floor(SAMPLE_RATE*P_MAX_S);
+
+	/* find global peak */
+
+	gmax = 0.0;
+	gmax_bin = PE_FFT_SIZE*DEC/pmax;
+	for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++)
+	{
+		if (Fw[i].real() > gmax)
+		{
+			gmax = Fw[i].real();
+			gmax_bin = i;
+		}
+	}
+
+	best_f0 = post_process_sub_multiples(Fw, pmax, gmax, gmax_bin, prev_f0);
+
+	/* Shift samples in buffer to make room for new samples */
+
+	for(i=0; i<m-n; i++)
+		snlp.sq[i] = snlp.sq[i+n];
+
+	/* return pitch period in samples and F0 estimate */
+
+	*pitch = (float)snlp.Fs/best_f0;
+
+	*prev_f0 = best_f0;
+
+	return(best_f0);
+}
+
+/*---------------------------------------------------------------------------*\
+
+  post_process_sub_multiples()
+
+  Given the global maximma of Fw[] we search integer submultiples for
+  local maxima.  If local maxima exist and they are above an
+  experimentally derived threshold (OK a magic number I pulled out of
+  the air) we choose the submultiple as the F0 estimate.
+
+  The rational for this is that the lowest frequency peak of Fw[]
+  should be F0, as Fw[] can be considered the autocorrelation function
+  of Sw[] (the speech spectrum).  However sometimes due to phase
+  effects the lowest frequency maxima may not be the global maxima.
+
+  This works OK in practice and favours low F0 values in the presence
+  of background noise which means the sinusoidal codec does an OK job
+  of synthesising the background noise.  High F0 in background noise
+  tends to sound more periodic introducing annoying artifacts.
+
+\*---------------------------------------------------------------------------*/
+
+float Cnlp::post_process_sub_multiples(std::complex<float> Fw[], int pmax, float gmax, int gmax_bin, float *prev_f0)
+{
+	int   min_bin, cmax_bin;
+	int   mult;
+	float thresh, best_f0;
+	int   b, bmin, bmax, lmax_bin;
+	float lmax;
+	int   prev_f0_bin;
+
+	/* post process estimate by searching submultiples */
+
+	mult = 2;
+	min_bin = PE_FFT_SIZE*DEC/pmax;
+	cmax_bin = gmax_bin;
+	prev_f0_bin = *prev_f0*(PE_FFT_SIZE*DEC)/SAMPLE_RATE;
+
+	while(gmax_bin/mult >= min_bin)
+	{
+
+		b = gmax_bin/mult;			/* determine search interval */
+		bmin = 0.8*b;
+		bmax = 1.2*b;
+		if (bmin < min_bin)
+			bmin = min_bin;
+
+		/* lower threshold to favour previous frames pitch estimate,
+		    this is a form of pitch tracking */
+
+		if ((prev_f0_bin > bmin) && (prev_f0_bin < bmax))
+			thresh = CNLP*0.5*gmax;
+		else
+			thresh = CNLP*gmax;
+
+		lmax = 0;
+		lmax_bin = bmin;
+		for (b=bmin; b<=bmax; b++) 	     /* look for maximum in interval */
+			if (Fw[b].real() > lmax)
+			{
+				lmax = Fw[b].real();
+				lmax_bin = b;
+			}
+
+		if (lmax > thresh)
+			if ((lmax > Fw[lmax_bin-1].real()) && (lmax > Fw[lmax_bin+1].real()))
+			{
+				cmax_bin = lmax_bin;
+			}
+
+		mult++;
+	}
+
+	best_f0 = (float)cmax_bin*SAMPLE_RATE/(PE_FFT_SIZE*DEC);
+
+	return best_f0;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: fdmdv_16_to_8()
+  AUTHOR......: David Rowe
+  DATE CREATED: 9 May 2012
+
+  Changes the sample rate of a signal from 16 to 8 kHz.
+
+  n is the number of samples at the 8 kHz rate, there are FDMDV_OS*n
+  samples at the 48 kHz rate.  As above however a memory of
+  FDMDV_OS_TAPS samples is reqd for in16k[] (see t16_8.c unit test as example).
+
+  Low pass filter the 16 kHz signal at 4 kHz using the same filter as
+  the upsampler, then just output every FDMDV_OS-th filtered sample.
+
+  Note: this function copied from fdmdv.c, included in nlp.c as a convenience
+  to avoid linking with another source file.
+
+\*---------------------------------------------------------------------------*/
+
+void Cnlp::fdmdv_16_to_8(float out8k[], float in16k[], int n)
+{
+	float acc;
+	int   i,j,k;
+
+	for(i=0, k=0; k<n; i+=FDMDV_OS, k++)
+	{
+		acc = 0.0;
+		for(j=0; j<FDMDV_OS_TAPS_16K; j++)
+			acc += fdmdv_os_filter[j]*in16k[i-j];
+		out8k[k] = acc;
+	}
+
+	/* update filter memory */
+
+	for(i=-FDMDV_OS_TAPS_16K; i<0; i++)
+		in16k[i] = in16k[i + n*FDMDV_OS];
+}
+
+// there is a little overhead for inplace kiss_fft but this is
+// on the powerful platforms like the Raspberry or even x86 PC based ones
+// not noticeable
+// the reduced usage of RAM and increased performance on STM32 platforms
+// should be worth it.
+void Cnlp::codec2_fft_inplace(FFT_STATE &cfg, std::complex<float> *inout)
+{
+	std::complex<float> in[512];
+	// decide whether to use the local stack based buffer for in
+	// or to allow kiss_fft to allocate RAM
+	// second part is just to play safe since first method
+	// is much faster and uses less RAM
+	if (cfg.nfft <= 512)
+	{
+		memcpy(in, inout, cfg.nfft*sizeof(std::complex<float>));
+		kiss.fft(cfg, in, inout);
+	}
+	else
+	{
+		kiss.fft(cfg, inout, inout);
+	}
+}
diff --git a/codec2/nlp.h b/codec2/nlp.h
new file mode 100644
index 0000000..e531a90
--- /dev/null
+++ b/codec2/nlp.h
@@ -0,0 +1,87 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: nlp.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 23/3/93
+
+  Non Linear Pitch (NLP) estimation functions.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __NLP__
+#define __NLP__
+
+#include <complex>
+#include <vector>
+
+#include "defines.h"
+
+/*---------------------------------------------------------------------------*\
+
+ 				DEFINES
+
+\*---------------------------------------------------------------------------*/
+
+#define PMAX_M      320		/* maximum NLP analysis window size     */
+#define COEFF       0.95	/* notch filter parameter               */
+#define PE_FFT_SIZE 512		/* DFT size for pitch estimation        */
+#define DEC         5		/* decimation factor                    */
+#define SAMPLE_RATE 8000
+#define PI          3.141592654	/* mathematical constant                */
+#define T           0.1         /* threshold for local minima candidate */
+#define F0_MAX      500
+#define CNLP        0.3	        /* post processor constant              */
+#define NLP_NTAP 48	        /* Decimation LPF order */
+
+/* 8 to 16 kHz sample rate conversion */
+
+#define FDMDV_OS                 2                            /* oversampling rate                   */
+#define FDMDV_OS_TAPS_16K       48                            /* number of OS filter taps at 16kHz   */
+#define FDMDV_OS_TAPS_8K        (FDMDV_OS_TAPS_16K/FDMDV_OS)  /* number of OS filter taps at 8kHz    */
+
+
+using NLP = struct nlp_tag
+{
+	int           Fs;                /* sample rate in Hz            */
+	int           m;
+	float         w[PMAX_M/DEC];     /* DFT window                   */
+	float         sq[PMAX_M];	     /* squared speech samples       */
+	float         mem_x,mem_y;       /* memory for notch filter      */
+	float         mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
+	FFT_STATE     fft_cfg;           /* kiss FFT config              */
+	std::vector<float> Sn16k;	     /* Fs=16kHz input speech vector */
+};
+
+
+class Cnlp {
+public:
+	void nlp_create(C2CONST *c2const);
+	void nlp_destroy();
+	float nlp(float Sn[], int n, float *pitch_samples, float *prev_f0);
+	void codec2_fft_inplace(FFT_STATE &cfg, std::complex<float> *inout);
+
+private:
+	float post_process_sub_multiples(std::complex<float> Fw[], int pmax, float gmax, int gmax_bin, float *prev_f0);
+	void fdmdv_16_to_8(float out8k[], float in16k[], int n);
+
+	NLP snlp;
+};
+
+#endif
diff --git a/codec2/pack.cpp b/codec2/pack.cpp
new file mode 100644
index 0000000..8382f95
--- /dev/null
+++ b/codec2/pack.cpp
@@ -0,0 +1,139 @@
+/*
+  Copyright (C) 2010 Perens LLC <bruce@perens.com>
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "defines.h"
+#include "quantise.h"
+#include <stdio.h>
+
+/* Compile-time constants */
+/* Size of unsigned char in bits. Assumes 8 bits-per-char. */
+static const unsigned int	WordSize = 8;
+
+/* Mask to pick the bit component out of bitIndex. */
+static const unsigned int	IndexMask = 0x7;
+
+/* Used to pick the word component out of bitIndex. */
+static const unsigned int	ShiftRight = 3;
+
+/** Pack a bit field into a bit string, encoding the field in Gray code.
+ *
+ * The output is an array of unsigned char data. The fields are efficiently
+ * packed into the bit string. The Gray coding is a naive attempt to reduce
+ * the effect of single-bit errors, we expect to do a better job as the
+ * codec develops.
+ *
+ * This code would be simpler if it just set one bit at a time in the string,
+ * but would hit the same cache line more often. I'm not sure the complexity
+ * gains us anything here.
+ *
+ * Although field is currently of int type rather than unsigned for
+ * compatibility with the rest of the code, indices are always expected to
+ * be >= 0.
+ */
+void CQuantize::pack(
+	unsigned char *bitArray,  /* The output bit string. */
+	unsigned int  *bitIndex,  /* Index into the string in BITS, not bytes.*/
+	int			   field,	  /* The bit field to be packed. */
+	unsigned int   fieldWidth /* Width of the field in BITS, not bytes. */
+)
+{
+	pack_natural_or_gray(bitArray, bitIndex, field, fieldWidth, 1);
+}
+
+void CQuantize::pack_natural_or_gray(
+	unsigned char *bitArray,   /* The output bit string. */
+	unsigned int  *bitIndex,   /* Index into the string in BITS, not bytes.*/
+	int			   field,	   /* The bit field to be packed. */
+	unsigned int   fieldWidth, /* Width of the field in BITS, not bytes. */
+	unsigned int   gray        /* non-zero for gray coding */
+)
+{
+	if (gray)
+	{
+		/* Convert the field to Gray code */
+		field = (field >> 1) ^ field;
+	}
+
+	do
+	{
+		unsigned int bI = *bitIndex;
+		unsigned int bitsLeft = WordSize - (bI & IndexMask);
+		unsigned int sliceWidth = bitsLeft < fieldWidth ? bitsLeft : fieldWidth;
+		unsigned int wordIndex = bI >> ShiftRight;
+
+		bitArray[wordIndex] |= ((unsigned char)((field >> (fieldWidth - sliceWidth)) << (bitsLeft - sliceWidth)));
+
+		*bitIndex = bI + sliceWidth;
+		fieldWidth -= sliceWidth;
+	}
+	while ( fieldWidth != 0 );
+}
+
+/** Unpack a field from a bit string, converting from Gray code to binary.
+ *
+ */
+int CQuantize::unpack(
+	const unsigned char *bitArray, /* The input bit string. */
+	unsigned int        *bitIndex, /* Index into the string in BITS, not bytes.*/
+	unsigned int		 fieldWidth/* Width of the field in BITS, not bytes. */
+)
+{
+	return unpack_natural_or_gray(bitArray, bitIndex, fieldWidth, 1);
+}
+
+/** Unpack a field from a bit string, to binary, optionally using
+ * natural or Gray code.
+ *
+ */
+int CQuantize::unpack_natural_or_gray(
+	const unsigned char *bitArray,  /* The input bit string. */
+	unsigned int        *bitIndex,  /* Index into the string in BITS, not bytes.*/
+	unsigned int		 fieldWidth,/* Width of the field in BITS, not bytes. */
+	unsigned int         gray       /* non-zero for Gray coding */
+)
+{
+	unsigned int	field = 0;
+	unsigned int	t;
+
+	do
+	{
+		unsigned int bI = *bitIndex;
+		unsigned int bitsLeft = WordSize - (bI & IndexMask);
+		unsigned int sliceWidth = bitsLeft < fieldWidth ? bitsLeft : fieldWidth;
+
+		field |= (((bitArray[bI >> ShiftRight] >> (bitsLeft - sliceWidth)) & ((1 << sliceWidth) - 1)) << (fieldWidth - sliceWidth));
+
+		*bitIndex = bI + sliceWidth;
+		fieldWidth -= sliceWidth;
+	}
+	while ( fieldWidth != 0 );
+
+	if (gray)
+	{
+		/* Convert from Gray code to binary. Works for maximum 8-bit fields. */
+		t = field ^ (field >> 8);
+		t ^= (t >> 4);
+		t ^= (t >> 2);
+		t ^= (t >> 1);
+	}
+	else
+	{
+		t = field;
+	}
+
+	return t;
+}
diff --git a/codec2/qbase.cpp b/codec2/qbase.cpp
new file mode 100644
index 0000000..a94ef46
--- /dev/null
+++ b/codec2/qbase.cpp
@@ -0,0 +1,247 @@
+#include <assert.h>
+#include <math.h>
+
+#include "qbase.h"
+
+/*---------------------------------------------------------------------------*\
+
+  quantise
+
+  Quantises vec by choosing the nearest vector in codebook cb, and
+  returns the vector index.  The squared error of the quantised vector
+  is added to se.
+
+\*---------------------------------------------------------------------------*/
+
+long CQbase::quantise(const float *cb, float vec[], float w[], int k, int m, float *se)
+/* float   cb[][K];	current VQ codebook       */
+/* float   vec[];	vector to quantise        */
+/* float   w[];     weighting vector          */
+/* int	   k;		dimension of vectors      */
+/* int     m;		size of codebook          */
+/* float   *se;		accumulated squared error */
+{
+	float   e;			/* current error		*/
+	long	   besti;	/* best index so far	*/
+	float   beste;		/* best error so far	*/
+	long	   j;
+	int     i;
+	float   diff;
+
+	besti = 0;
+	beste = 1E32;
+	for(j=0; j<m; j++)
+	{
+		e = 0.0;
+		for(i=0; i<k; i++)
+		{
+			diff = cb[j*k+i]-vec[i];
+			e += (diff*w[i] * diff*w[i]);
+		}
+		if (e < beste)
+		{
+			beste = e;
+			besti = j;
+		}
+	}
+
+	*se += beste;
+
+	return(besti);
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: encode_WoE()
+  AUTHOR......: Jean-Marc Valin & David Rowe
+  DATE CREATED: 11 May 2012
+
+  Joint Wo and LPC energy vector quantiser developed my Jean-Marc
+  Valin.  Returns index, and updated states xq[].
+
+\*---------------------------------------------------------------------------*/
+
+int CQbase::encode_WoE(MODEL *model, float e, float xq[])
+{
+	int          i, n1;
+	float        x[2];
+	float        err[2];
+	float        w[2];
+	const float *codebook1 = ge_cb[0].cb;
+	int          nb_entries = ge_cb[0].m;
+	int          ndim = ge_cb[0].k;
+
+	assert((1<<WO_E_BITS) == nb_entries);
+
+	if (e < 0.0) e = 0;  /* occasional small negative energies due LPC round off I guess */
+
+	x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2);
+	x[1] = 10.0*log10f(1e-4 + e);
+
+	compute_weights2(x, xq, w);
+	for (i=0; i<ndim; i++)
+		err[i] = x[i]-ge_coeff[i]*xq[i];
+	n1 = find_nearest_weighted(codebook1, nb_entries, err, w, ndim);
+
+	for (i=0; i<ndim; i++)
+	{
+		xq[i] = ge_coeff[i]*xq[i] + codebook1[ndim*n1+i];
+		err[i] -= codebook1[ndim*n1+i];
+	}
+
+	//printf("enc: %f %f (%f)(%f) \n", xq[0], xq[1], e, 10.0*log10(1e-4 + e));
+	return n1;
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: decode_WoE()
+  AUTHOR......: Jean-Marc Valin & David Rowe
+  DATE CREATED: 11 May 2012
+
+  Joint Wo and LPC energy vector quantiser developed my Jean-Marc
+  Valin.  Given index and states xq[], returns Wo & E, and updates
+  states xq[].
+
+\*---------------------------------------------------------------------------*/
+
+void CQbase::decode_WoE(C2CONST *c2const, MODEL *model, float *e, float xq[], int n1)
+{
+	int          i;
+	const float *codebook1 = ge_cb[0].cb;
+	int          ndim = ge_cb[0].k;
+	float Wo_min = c2const->Wo_min;
+	float Wo_max = c2const->Wo_max;
+
+	for (i=0; i<ndim; i++)
+	{
+		xq[i] = ge_coeff[i]*xq[i] + codebook1[ndim*n1+i];
+	}
+
+	//printf("dec: %f %f\n", xq[0], xq[1]);
+	model->Wo = powf(2.0, xq[0])*(PI*50.0)/4000.0;
+
+	/* bit errors can make us go out of range leading to all sorts of
+	   probs like seg faults */
+
+	if (model->Wo > Wo_max) model->Wo = Wo_max;
+	if (model->Wo < Wo_min) model->Wo = Wo_min;
+
+	model->L  = PI/model->Wo; /* if we quantise Wo re-compute L */
+
+	*e = exp10f(xq[1]/10.0);
+}
+
+void CQbase::compute_weights2(const float *x, const float *xp, float *w)
+{
+	w[0] = 30;
+	w[1] = 1;
+	if (x[1]<0)
+	{
+		w[0] *= .6;
+		w[1] *= .3;
+	}
+	if (x[1]<-10)
+	{
+		w[0] *= .3;
+		w[1] *= .3;
+	}
+	/* Higher weight if pitch is stable */
+	if (fabsf(x[0]-xp[0])<.2)
+	{
+		w[0] *= 2;
+		w[1] *= 1.5;
+	}
+	else if (fabsf(x[0]-xp[0])>.5)   /* Lower if not stable */
+	{
+		w[0] *= .5;
+	}
+
+	/* Lower weight for low energy */
+	if (x[1] < xp[1]-10)
+	{
+		w[1] *= .5;
+	}
+	if (x[1] < xp[1]-20)
+	{
+		w[1] *= .5;
+	}
+
+	//w[0] = 30;
+	//w[1] = 1;
+
+	/* Square the weights because it's applied on the squared error */
+	w[0] *= w[0];
+	w[1] *= w[1];
+
+}
+
+int CQbase::find_nearest_weighted(const float *codebook, int nb_entries, float *x, const float *w, int ndim)
+{
+	int i, j;
+	float min_dist = 1e15;
+	int nearest = 0;
+
+	for (i=0; i<nb_entries; i++)
+	{
+		float dist=0;
+		for (j=0; j<ndim; j++)
+			dist += w[j]*(x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);
+		if (dist<min_dist)
+		{
+			min_dist = dist;
+			nearest = i;
+		}
+	}
+	return nearest;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: encode_log_Wo()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22/8/2010
+
+  Encodes Wo in the log domain using a WO_LEVELS quantiser.
+
+\*---------------------------------------------------------------------------*/
+
+int CQbase::encode_log_Wo(C2CONST *c2const, float Wo, int bits)
+{
+	int   index, Wo_levels = 1<<bits;
+	float Wo_min = c2const->Wo_min;
+	float Wo_max = c2const->Wo_max;
+	float norm;
+
+	norm = (log10f(Wo) - log10f(Wo_min))/(log10f(Wo_max) - log10f(Wo_min));
+	index = floorf(Wo_levels * norm + 0.5);
+	if (index < 0 ) index = 0;
+	if (index > (Wo_levels-1)) index = Wo_levels-1;
+
+	return index;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: decode_log_Wo()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22/8/2010
+
+  Decodes Wo using a WO_LEVELS quantiser in the log domain.
+
+\*---------------------------------------------------------------------------*/
+
+float CQbase::decode_log_Wo(C2CONST *c2const, int index, int bits)
+{
+	float Wo_min = c2const->Wo_min;
+	float Wo_max = c2const->Wo_max;
+	float step;
+	float Wo;
+	int   Wo_levels = 1<<bits;
+
+	step = (log10f(Wo_max) - log10f(Wo_min))/Wo_levels;
+	Wo   = log10f(Wo_min) + step*(index);
+
+	return exp10f(Wo);
+}
diff --git a/codec2/qbase.h b/codec2/qbase.h
new file mode 100644
index 0000000..be65949
--- /dev/null
+++ b/codec2/qbase.h
@@ -0,0 +1,39 @@
+#ifndef QBASE_H
+#define QBASE_H
+
+#include "defines.h"
+
+#define WO_BITS     7
+#define WO_LEVELS   (1<<WO_BITS)
+#define WO_DT_BITS  3
+
+#define E_BITS      5
+#define E_LEVELS    (1<<E_BITS)
+#define E_MIN_DB   -10.0
+#define E_MAX_DB    40.0
+
+#define LSP_SCALAR_INDEXES    10
+#define LSPD_SCALAR_INDEXES    10
+#define LSP_PRED_VQ_INDEXES    3
+
+#define WO_E_BITS   8
+
+#define LPCPF_GAMMA 0.5
+#define LPCPF_BETA  0.2
+
+class CQbase {
+public:
+	int encode_WoE(MODEL *model, float e, float xq[]);
+	void decode_WoE(C2CONST *c2const, MODEL *model, float *e, float xq[], int n1);
+	int encode_log_Wo(C2CONST *c2const, float Wo, int bits);
+	float decode_log_Wo(C2CONST *c2const, int index, int bits);
+protected:
+	long quantise(const float * cb, float vec[], float w[], int k, int m, float *se);
+	void compute_weights2(const float *x, const float *xp, float *w);
+	int find_nearest_weighted(const float *codebook, int nb_entries, float *x, const float *w, int ndim);
+
+	const float ge_coeff[2] = { 0.8, 0.9 };
+
+};
+
+#endif
diff --git a/codec2/quantise.cpp b/codec2/quantise.cpp
new file mode 100644
index 0000000..dd3e4ef
--- /dev/null
+++ b/codec2/quantise.cpp
@@ -0,0 +1,897 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: quantise.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 31/5/92
+
+  Quantisation functions for the sinusoidal coder.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "defines.h"
+#include "quantise.h"
+#include "lpc.h"
+#include "kiss_fft.h"
+
+extern CKissFFT kiss;
+
+#define LSP_DELTA1 0.01         /* grid spacing for LSP root searches */
+
+/*---------------------------------------------------------------------------*\
+
+                             FUNCTIONS
+
+\*---------------------------------------------------------------------------*/
+
+int CQuantize::lsp_bits(int i)
+{
+	return lsp_cb[i].log2m;
+}
+
+int CQuantize::lspd_bits(int i)
+{
+	return lsp_cbd[i].log2m;
+}
+
+
+
+/*---------------------------------------------------------------------------*\
+
+  encode_lspds_scalar()
+
+  Scalar/VQ LSP difference quantiser.
+
+\*---------------------------------------------------------------------------*/
+
+void CQuantize::encode_lspds_scalar(int indexes[], float lsp[], int order)
+{
+	int   i,k,m;
+	float lsp_hz[order];
+	float lsp__hz[order];
+	float dlsp[order];
+	float dlsp_[order];
+	float wt[order];
+	const float *cb;
+	float se;
+
+	for(i=0; i<order; i++)
+	{
+		wt[i] = 1.0;
+	}
+
+	/* convert from radians to Hz so we can use human readable
+	   frequencies */
+
+	for(i=0; i<order; i++)
+		lsp_hz[i] = (4000.0/PI)*lsp[i];
+
+	wt[0] = 1.0;
+	for(i=0; i<order; i++)
+	{
+
+		/* find difference from previous qunatised lsp */
+
+		if (i)
+			dlsp[i] = lsp_hz[i] - lsp__hz[i-1];
+		else
+			dlsp[0] = lsp_hz[0];
+
+		k = lsp_cbd[i].k;
+		m = lsp_cbd[i].m;
+		cb = lsp_cbd[i].cb;
+		indexes[i] = quantise(cb, &dlsp[i], wt, k, m, &se);
+		dlsp_[i] = cb[indexes[i]*k];
+
+
+		if (i)
+			lsp__hz[i] = lsp__hz[i-1] + dlsp_[i];
+		else
+			lsp__hz[0] = dlsp_[0];
+	}
+
+}
+
+
+void CQuantize::decode_lspds_scalar( float lsp_[], int indexes[], int   order)
+{
+	int   i,k;
+	float lsp__hz[order];
+	float dlsp_[order];
+	const float *cb;
+
+	for(i=0; i<order; i++)
+	{
+
+		k = lsp_cbd[i].k;
+		cb = lsp_cbd[i].cb;
+		dlsp_[i] = cb[indexes[i]*k];
+
+		if (i)
+			lsp__hz[i] = lsp__hz[i-1] + dlsp_[i];
+		else
+			lsp__hz[0] = dlsp_[0];
+
+		lsp_[i] = (PI/4000.0)*lsp__hz[i];
+	}
+
+}
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+#define MAX_ENTRIES 16384
+
+void CQuantize::compute_weights(const float *x, float *w, int ndim)
+{
+	int i;
+	w[0] = MIN(x[0], x[1]-x[0]);
+	for (i=1; i<ndim-1; i++)
+		w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]);
+	w[ndim-1] = MIN(x[ndim-1]-x[ndim-2], PI-x[ndim-1]);
+
+	for (i=0; i<ndim; i++)
+		w[i] = 1./(.01+w[i]);
+}
+
+int CQuantize::find_nearest(const float *codebook, int nb_entries, float *x, int ndim)
+{
+	int i, j;
+	float min_dist = 1e15;
+	int nearest = 0;
+
+	for (i=0; i<nb_entries; i++)
+	{
+		float dist=0;
+		for (j=0; j<ndim; j++)
+			dist += (x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);
+		if (dist<min_dist)
+		{
+			min_dist = dist;
+			nearest = i;
+		}
+	}
+	return nearest;
+}
+
+int CQuantize::check_lsp_order(float lsp[], int order)
+{
+	int   i;
+	float tmp;
+	int   swaps = 0;
+
+	for(i=1; i<order; i++)
+		if (lsp[i] < lsp[i-1])
+		{
+			//fprintf(stderr, "swap %d\n",i);
+			swaps++;
+			tmp = lsp[i-1];
+			lsp[i-1] = lsp[i]-0.1;
+			lsp[i] = tmp+0.1;
+			i = 1; /* start check again, as swap may have caused out of order */
+		}
+
+	return swaps;
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+   lpc_post_filter()
+
+   Applies a post filter to the LPC synthesis filter power spectrum
+   Pw, which supresses the inter-formant energy.
+
+   The algorithm is from p267 (Section 8.6) of "Digital Speech",
+   edited by A.M. Kondoz, 1994 published by Wiley and Sons.  Chapter 8
+   of this text is on the MBE vocoder, and this is a freq domain
+   adaptation of post filtering commonly used in CELP.
+
+   I used the Octave simulation lpcpf.m to get an understanding of the
+   algorithm.
+
+   Requires two more FFTs which is significantly more MIPs.  However
+   it should be possible to implement this more efficiently in the
+   time domain.  Just not sure how to handle relative time delays
+   between the synthesis stage and updating these coeffs.  A smaller
+   FFT size might also be accetable to save CPU.
+
+   TODO:
+   [ ] sync var names between Octave and C version
+   [ ] doc gain normalisation
+   [ ] I think the first FFT is not rqd as we do the same
+       thing in aks_to_M2().
+
+\*---------------------------------------------------------------------------*/
+
+void CQuantize::lpc_post_filter(FFTR_STATE *fftr_fwd_cfg, float Pw[], float ak[], int order, float beta, float gamma, int bass_boost, float E)
+{
+	int   i;
+	float x[FFT_ENC];   /* input to FFTs                */
+	std::complex<float>  Ww[FFT_ENC/2+1];  /* weighting spectrum           */
+	float Rw[FFT_ENC/2+1];  /* R = WA                       */
+	float e_before, e_after, gain;
+	float Pfw;
+	float max_Rw, min_Rw;
+	float coeff;
+
+	/* Determine weighting filter spectrum W(exp(jw)) ---------------*/
+
+	for(i=0; i<FFT_ENC; i++)
+	{
+		x[i] = 0.0;
+	}
+
+	x[0]  = ak[0];
+	coeff = gamma;
+	for(i=1; i<=order; i++)
+	{
+		x[i] = ak[i] * coeff;
+		coeff *= gamma;
+	}
+	kiss.fftr(*fftr_fwd_cfg, x, Ww);
+
+	for(i=0; i<FFT_ENC/2; i++)
+	{
+		Ww[i].real(Ww[i].real() * Ww[i].real() + Ww[i].imag() * Ww[i].imag());
+	}
+
+	/* Determined combined filter R = WA ---------------------------*/
+
+	max_Rw = 0.0;
+	min_Rw = 1E32;
+	for(i=0; i<FFT_ENC/2; i++)
+	{
+		Rw[i] = sqrtf(Ww[i].real() * Pw[i]);
+		if (Rw[i] > max_Rw)
+			max_Rw = Rw[i];
+		if (Rw[i] < min_Rw)
+			min_Rw = Rw[i];
+
+	}
+
+	/* create post filter mag spectrum and apply ------------------*/
+
+	/* measure energy before post filtering */
+
+	e_before = 1E-4;
+	for(i=0; i<FFT_ENC/2; i++)
+		e_before += Pw[i];
+
+	/* apply post filter and measure energy  */
+
+
+	e_after = 1E-4;
+	for(i=0; i<FFT_ENC/2; i++)
+	{
+		Pfw = powf(Rw[i], beta);
+		Pw[i] *= Pfw * Pfw;
+		e_after += Pw[i];
+	}
+	gain = e_before/e_after;
+
+	/* apply gain factor to normalise energy, and LPC Energy */
+
+	gain *= E;
+	for(i=0; i<FFT_ENC/2; i++)
+	{
+		Pw[i] *= gain;
+	}
+
+	if (bass_boost)
+	{
+		/* add 3dB to first 1 kHz to account for LP effect of PF */
+
+		for(i=0; i<FFT_ENC/8; i++)
+		{
+			Pw[i] *= 1.4*1.4;
+		}
+	}
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+   aks_to_M2()
+
+   Transforms the linear prediction coefficients to spectral amplitude
+   samples.  This function determines A(m) from the average energy per
+   band using an FFT.
+
+\*---------------------------------------------------------------------------*/
+
+void CQuantize::aks_to_M2(
+	FFTR_STATE * fftr_fwd_cfg,
+	float         ak[],	     /* LPC's */
+	int           order,
+	MODEL        *model,	   /* sinusoidal model parameters for this frame */
+	float         E,	       /* energy term */
+	float        *snr,	       /* signal to noise ratio for this frame in dB */
+	int           sim_pf,      /* true to simulate a post filter */
+	int           pf,          /* true to enable actual LPC post filter */
+	int           bass_boost,  /* enable LPC filter 0-1kHz 3dB boost */
+	float         beta,
+	float         gamma,       /* LPC post filter parameters */
+	std::complex<float>          Aw[]         /* output power spectrum */
+)
+{
+	int i,m;		/* loop variables */
+	int am,bm;		/* limits of current band */
+	float r;		/* no. rads/bin */
+	float Em;		/* energy in band */
+	float Am;		/* spectral amplitude sample */
+	float signal, noise;
+
+	r = TWO_PI/(FFT_ENC);
+
+	/* Determine DFT of A(exp(jw)) --------------------------------------------*/
+	{
+		float a[FFT_ENC];  /* input to FFT for power spectrum */
+
+		for(i=0; i<FFT_ENC; i++)
+		{
+			a[i] = 0.0;
+		}
+
+		for(i=0; i<=order; i++)
+			a[i] = ak[i];
+		kiss.fftr(*fftr_fwd_cfg, a, Aw);
+	}
+
+	/* Determine power spectrum P(w) = E/(A(exp(jw))^2 ------------------------*/
+
+	float Pw[FFT_ENC/2];
+
+	for(i=0; i<FFT_ENC/2; i++)
+	{
+		Pw[i] = 1.0/(Aw[i].real() * Aw[i].real() + Aw[i].imag() * Aw[i].imag() + 1E-6);
+	}
+
+	if (pf)
+		lpc_post_filter(fftr_fwd_cfg, Pw, ak, order, beta, gamma, bass_boost, E);
+	else
+	{
+		for(i=0; i<FFT_ENC/2; i++)
+		{
+			Pw[i] *= E;
+		}
+	}
+
+	/* Determine magnitudes from P(w) ----------------------------------------*/
+
+	/* when used just by decoder {A} might be all zeroes so init signal
+	   and noise to prevent log(0) errors */
+
+	signal = 1E-30;
+	noise = 1E-32;
+
+	for(m=1; m<=model->L; m++)
+	{
+		am = (int)((m - 0.5)*model->Wo/r + 0.5);
+		bm = (int)((m + 0.5)*model->Wo/r + 0.5);
+
+		// FIXME: With arm_rfft_fast_f32 we have to use this
+		// otherwise sometimes a to high bm is calculated
+		// which causes trouble later in the calculation
+		// chain
+		// it seems for some reason model->Wo is calculated somewhat too high
+		if (bm>FFT_ENC/2)
+		{
+			bm = FFT_ENC/2;
+		}
+		Em = 0.0;
+
+		for(i=am; i<bm; i++)
+			Em += Pw[i];
+		Am = sqrtf(Em);
+
+		signal += model->A[m]*model->A[m];
+		noise  += (model->A[m] - Am)*(model->A[m] - Am);
+
+		/* This code significantly improves perf of LPC model, in
+		   particular when combined with phase0.  The LPC spectrum tends
+		   to track just under the peaks of the spectral envelope, and
+		   just above nulls.  This algorithm does the reverse to
+		   compensate - raising the amplitudes of spectral peaks, while
+		   attenuating the null.  This enhances the formants, and
+		   supresses the energy between formants. */
+
+		if (sim_pf)
+		{
+			if (Am > model->A[m])
+				Am *= 0.7;
+			if (Am < model->A[m])
+				Am *= 1.4;
+		}
+		model->A[m] = Am;
+	}
+	*snr = 10.0*log10f(signal/noise);
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: encode_Wo()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22/8/2010
+
+  Encodes Wo using a WO_LEVELS quantiser.
+
+\*---------------------------------------------------------------------------*/
+
+int CQuantize::encode_Wo(C2CONST *c2const, float Wo, int bits)
+{
+	int   index, Wo_levels = 1<<bits;
+	float Wo_min = c2const->Wo_min;
+	float Wo_max = c2const->Wo_max;
+	float norm;
+
+	norm = (Wo - Wo_min)/(Wo_max - Wo_min);
+	index = floorf(Wo_levels * norm + 0.5);
+	if (index < 0 ) index = 0;
+	if (index > (Wo_levels-1)) index = Wo_levels-1;
+
+	return index;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: decode_Wo()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22/8/2010
+
+  Decodes Wo using a WO_LEVELS quantiser.
+
+\*---------------------------------------------------------------------------*/
+
+float CQuantize::decode_Wo(C2CONST *c2const, int index, int bits)
+{
+	float Wo_min = c2const->Wo_min;
+	float Wo_max = c2const->Wo_max;
+	float step;
+	float Wo;
+	int   Wo_levels = 1<<bits;
+
+	step = (Wo_max - Wo_min)/Wo_levels;
+	Wo   = Wo_min + step*(index);
+
+	return Wo;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: speech_to_uq_lsps()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22/8/2010
+
+  Analyse a windowed frame of time domain speech to determine LPCs
+  which are the converted to LSPs for quantisation and transmission
+  over the channel.
+
+\*---------------------------------------------------------------------------*/
+
+float CQuantize::speech_to_uq_lsps(float lsp[], float ak[], float Sn[], float w[], int m_pitch, int order)
+{
+	int   i, roots;
+	float Wn[m_pitch];
+	float R[order+1];
+	float e, E;
+	Clpc lpc;
+
+	e = 0.0;
+	for(i=0; i<m_pitch; i++)
+	{
+		Wn[i] = Sn[i]*w[i];
+		e += Wn[i]*Wn[i];
+	}
+
+	/* trap 0 energy case as LPC analysis will fail */
+
+	if (e == 0.0)
+	{
+		for(i=0; i<order; i++)
+			lsp[i] = (PI/order)*(float)i;
+		return 0.0;
+	}
+
+	lpc.autocorrelate(Wn, R, m_pitch, order);
+	lpc.levinson_durbin(R, ak, order);
+
+	E = 0.0;
+	for(i=0; i<=order; i++)
+		E += ak[i]*R[i];
+
+	/* 15 Hz BW expansion as I can't hear the difference and it may help
+	   help occasional fails in the LSP root finding.  Important to do this
+	   after energy calculation to avoid -ve energy values.
+	*/
+
+	for(i=0; i<=order; i++)
+		ak[i] *= powf(0.994,(float)i);
+
+	roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
+	if (roots != order)
+	{
+		/* if root finding fails use some benign LSP values instead */
+		for(i=0; i<order; i++)
+			lsp[i] = (PI/order)*(float)i;
+	}
+
+	return E;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: encode_lsps_scalar()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22/8/2010
+
+  Thirty-six bit sclar LSP quantiser. From a vector of unquantised
+  (floating point) LSPs finds the quantised LSP indexes.
+
+\*---------------------------------------------------------------------------*/
+
+void CQuantize::encode_lsps_scalar(int indexes[], float lsp[], int order)
+{
+	int    i,k,m;
+	float  wt[1];
+	float  lsp_hz[order];
+	const float *cb;
+	float se;
+
+	/* convert from radians to Hz so we can use human readable
+	   frequencies */
+
+	for(i=0; i<order; i++)
+		lsp_hz[i] = (4000.0/PI)*lsp[i];
+
+	/* scalar quantisers */
+
+	wt[0] = 1.0;
+	for(i=0; i<order; i++)
+	{
+		k = lsp_cb[i].k;
+		m = lsp_cb[i].m;
+		cb = lsp_cb[i].cb;
+		indexes[i] = quantise(cb, &lsp_hz[i], wt, k, m, &se);
+	}
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: decode_lsps_scalar()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22/8/2010
+
+  From a vector of quantised LSP indexes, returns the quantised
+  (floating point) LSPs.
+
+\*---------------------------------------------------------------------------*/
+
+void CQuantize::decode_lsps_scalar(float lsp[], int indexes[], int order)
+{
+	int    i,k;
+	float  lsp_hz[order];
+	const float *cb;
+
+	for(i=0; i<order; i++)
+	{
+		k = lsp_cb[i].k;
+		cb = lsp_cb[i].cb;
+		lsp_hz[i] = cb[indexes[i]*k];
+	}
+
+	/* convert back to radians */
+
+	for(i=0; i<order; i++)
+		lsp[i] = (PI/4000.0)*lsp_hz[i];
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: bw_expand_lsps()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22/8/2010
+
+  Applies Bandwidth Expansion (BW) to a vector of LSPs.  Prevents any
+  two LSPs getting too close together after quantisation.  We know
+  from experiment that LSP quantisation errors < 12.5Hz (25Hz step
+  size) are inaudible so we use that as the minimum LSP separation.
+
+\*---------------------------------------------------------------------------*/
+
+void CQuantize::bw_expand_lsps(float lsp[], int order, float min_sep_low, float min_sep_high)
+{
+	int i;
+
+	for(i=1; i<4; i++)
+	{
+
+		if ((lsp[i] - lsp[i-1]) < min_sep_low*(PI/4000.0))
+			lsp[i] = lsp[i-1] + min_sep_low*(PI/4000.0);
+
+	}
+
+	/* As quantiser gaps increased, larger BW expansion was required
+	   to prevent twinkly noises.  This may need more experiment for
+	   different quanstisers.
+	*/
+
+	for(i=4; i<order; i++)
+	{
+		if (lsp[i] - lsp[i-1] < min_sep_high*(PI/4000.0))
+			lsp[i] = lsp[i-1] + min_sep_high*(PI/4000.0);
+	}
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: apply_lpc_correction()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22/8/2010
+
+  Apply first harmonic LPC correction at decoder.  This helps improve
+  low pitch males after LPC modelling, like hts1a and morig.
+
+\*---------------------------------------------------------------------------*/
+
+void CQuantize::apply_lpc_correction(MODEL *model)
+{
+	if (model->Wo < (PI*150.0/4000))
+	{
+		model->A[1] *= 0.032;
+	}
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: encode_energy()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22/8/2010
+
+  Encodes LPC energy using an E_LEVELS quantiser.
+
+\*---------------------------------------------------------------------------*/
+
+int CQuantize::encode_energy(float e, int bits)
+{
+	int   index, e_levels = 1<<bits;
+	float e_min = E_MIN_DB;
+	float e_max = E_MAX_DB;
+	float norm;
+
+	e = 10.0*log10f(e);
+	norm = (e - e_min)/(e_max - e_min);
+	index = floorf(e_levels * norm + 0.5);
+	if (index < 0 ) index = 0;
+	if (index > (e_levels-1)) index = e_levels-1;
+
+	return index;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: decode_energy()
+  AUTHOR......: David Rowe
+  DATE CREATED: 22/8/2010
+
+  Decodes energy using a E_LEVELS quantiser.
+
+\*---------------------------------------------------------------------------*/
+
+float CQuantize::decode_energy(int index, int bits)
+{
+	float e_min = E_MIN_DB;
+	float e_max = E_MAX_DB;
+	float step;
+	float e;
+	int   e_levels = 1<<bits;
+
+	step = (e_max - e_min)/e_levels;
+	e    = e_min + step*(index);
+	e    = exp10f(e/10.0);
+
+	return e;
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: lpc_to_lsp()
+  AUTHOR......: David Rowe
+  DATE CREATED: 24/2/93
+
+  This function converts LPC coefficients to LSP coefficients.
+
+\*---------------------------------------------------------------------------*/
+
+int CQuantize::lpc_to_lsp(float *a, int order, float *freq, int nb, float delta)
+/*  float *a 		     	lpc coefficients			*/
+/*  int order			order of LPC coefficients (10) 		*/
+/*  float *freq 	      	LSP frequencies in radians      	*/
+/*  int nb			number of sub-intervals (4) 		*/
+/*  float delta			grid spacing interval (0.02) 		*/
+{
+	float psuml,psumr,psumm,temp_xr,xl,xr,xm = 0;
+	float temp_psumr;
+	int i,j,m,flag,k;
+	float *px;                	/* ptrs of respective P'(z) & Q'(z)	*/
+	float *qx;
+	float *p;
+	float *q;
+	float *pt;                	/* ptr used for cheb_poly_eval()
+				   whether P' or Q' 			*/
+	int roots=0;              	/* number of roots found 	        */
+	float Q[order + 1];
+	float P[order + 1];
+
+	flag = 1;
+	m = order/2;            	/* order of P'(z) & Q'(z) polynimials 	*/
+
+	/* Allocate memory space for polynomials */
+
+	/* determine P'(z)'s and Q'(z)'s coefficients where
+	  P'(z) = P(z)/(1 + z^(-1)) and Q'(z) = Q(z)/(1-z^(-1)) */
+
+	px = P;                      /* initilaise ptrs */
+	qx = Q;
+	p = px;
+	q = qx;
+	*px++ = 1.0;
+	*qx++ = 1.0;
+	for(i=1; i<=m; i++)
+	{
+		*px++ = a[i]+a[order+1-i]-*p++;
+		*qx++ = a[i]-a[order+1-i]+*q++;
+	}
+	px = P;
+	qx = Q;
+	for(i=0; i<m; i++)
+	{
+		*px = 2**px;
+		*qx = 2**qx;
+		px++;
+		qx++;
+	}
+	px = P;             	/* re-initialise ptrs 			*/
+	qx = Q;
+
+	/* Search for a zero in P'(z) polynomial first and then alternate to Q'(z).
+	Keep alternating between the two polynomials as each zero is found 	*/
+
+	xr = 0;             	/* initialise xr to zero 		*/
+	xl = 1.0;               	/* start at point xl = 1 		*/
+
+
+	for(j=0; j<order; j++)
+	{
+		if(j%2)            	/* determines whether P' or Q' is eval. */
+			pt = qx;
+		else
+			pt = px;
+
+		psuml = cheb_poly_eva(pt,xl,order);	/* evals poly. at xl 	*/
+		flag = 1;
+		while(flag && (xr >= -1.0))
+		{
+			xr = xl - delta ;                  	/* interval spacing 	*/
+			psumr = cheb_poly_eva(pt,xr,order);/* poly(xl-delta_x) 	*/
+			temp_psumr = psumr;
+			temp_xr = xr;
+
+			/* if no sign change increment xr and re-evaluate
+			   poly(xr). Repeat til sign change.  if a sign change has
+			   occurred the interval is bisected and then checked again
+			   for a sign change which determines in which interval the
+			   zero lies in.  If there is no sign change between poly(xm)
+			   and poly(xl) set interval between xm and xr else set
+			   interval between xl and xr and repeat till root is located
+			   within the specified limits  */
+
+			if(((psumr*psuml)<0.0) || (psumr == 0.0))
+			{
+				roots++;
+
+				psumm=psuml;
+				for(k=0; k<=nb; k++)
+				{
+					xm = (xl+xr)/2;        	/* bisect the interval 	*/
+					psumm=cheb_poly_eva(pt,xm,order);
+					if(psumm*psuml>0.)
+					{
+						psuml=psumm;
+						xl=xm;
+					}
+					else
+					{
+						psumr=psumm;
+						xr=xm;
+					}
+				}
+
+				/* once zero is found, reset initial interval to xr 	*/
+				freq[j] = (xm);
+				xl = xm;
+				flag = 0;       		/* reset flag for next search 	*/
+			}
+			else
+			{
+				psuml=temp_psumr;
+				xl=temp_xr;
+			}
+		}
+	}
+
+	/* convert from x domain to radians */
+
+	for(i=0; i<order; i++)
+	{
+		freq[i] = acosf(freq[i]);
+	}
+
+	return(roots);
+}
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: cheb_poly_eva()
+  AUTHOR......: David Rowe
+  DATE CREATED: 24/2/93
+
+  This function evalutes a series of chebyshev polynomials
+
+  FIXME: performing memory allocation at run time is very inefficient,
+  replace with stack variables of MAX_P size.
+
+\*---------------------------------------------------------------------------*/
+
+float CQuantize::cheb_poly_eva(float *coef,float x,int order)
+/*  float coef[]  	coefficients of the polynomial to be evaluated 	*/
+/*  float x   		the point where polynomial is to be evaluated 	*/
+/*  int order 		order of the polynomial 			*/
+{
+	int i;
+	float *t,*u,*v,sum;
+	float T[(order / 2) + 1];
+
+	/* Initialise pointers */
+
+	t = T;                          	/* T[i-2] 			*/
+	*t++ = 1.0;
+	u = t--;                        	/* T[i-1] 			*/
+	*u++ = x;
+	v = u--;                        	/* T[i] 			*/
+
+	/* Evaluate chebyshev series formulation using iterative approach 	*/
+
+	for(i=2; i<=order/2; i++)
+		*v++ = (2*x)*(*u++) - *t++;  	/* T[i] = 2*x*T[i-1] - T[i-2]	*/
+
+	sum=0.0;                        	/* initialise sum to zero 	*/
+	t = T;                          	/* reset pointer 		*/
+
+	/* Evaluate polynomial and return value also free memory space */
+
+	for(i=0; i<=order/2; i++)
+		sum+=coef[(order/2)-i]**t++;
+
+	return sum;
+}
diff --git a/codec2/quantise.h b/codec2/quantise.h
new file mode 100644
index 0000000..58453af
--- /dev/null
+++ b/codec2/quantise.h
@@ -0,0 +1,68 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: quantise.h
+  AUTHOR......: David Rowe
+  DATE CREATED: 31/5/92
+
+  Quantisation functions for the sinusoidal coder.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __QUANTISE__
+#define __QUANTISE__
+
+#include <complex>
+
+#include "qbase.h"
+
+class CQuantize : public CQbase {
+public:
+	void aks_to_M2(FFTR_STATE *fftr_fwd_cfg, float ak[], int order, MODEL *model, float E, float *snr, int sim_pf, int pf, int bass_boost, float beta, float gamma, std::complex<float> Aw[]);
+
+	int   encode_Wo(C2CONST *c2const, float Wo, int bits);
+	float decode_Wo(C2CONST *c2const, int index, int bits);
+	void  encode_lsps_scalar(int indexes[], float lsp[], int order);
+	void  decode_lsps_scalar(float lsp[], int indexes[], int order);
+	void  encode_lspds_scalar(int indexes[], float lsp[], int order);
+	void  decode_lspds_scalar(float lsp[], int indexes[], int order);
+
+	int encode_energy(float e, int bits);
+	float decode_energy(int index, int bits);
+
+	void pack(unsigned char * bits, unsigned int *nbit, int index, unsigned int index_bits);
+	void pack_natural_or_gray(unsigned char * bits, unsigned int *nbit, int index, unsigned int index_bits, unsigned int gray);
+	int  unpack(const unsigned char * bits, unsigned int *nbit, unsigned int index_bits);
+	int  unpack_natural_or_gray(const unsigned char * bits, unsigned int *nbit, unsigned int index_bits, unsigned int gray);
+
+	int lsp_bits(int i);
+	int lspd_bits(int i);
+
+	void apply_lpc_correction(MODEL *model);
+	float speech_to_uq_lsps(float lsp[], float ak[], float Sn[], float w[], int m_pitch, int order);
+	int check_lsp_order(float lsp[], int lpc_order);
+	void bw_expand_lsps(float lsp[], int order, float min_sep_low, float min_sep_high);
+
+private:
+	void compute_weights(const float *x, float *w, int ndim);
+	int find_nearest(const float *codebook, int nb_entries, float *x, int ndim);
+	void lpc_post_filter(FFTR_STATE *fftr_fwd_cfg, float Pw[], float ak[], int order, float beta, float gamma, int bass_boost, float E);
+	int lpc_to_lsp (float *a, int lpcrdr, float *freq, int nb, float delta);
+	float cheb_poly_eva(float *coef,float x,int order);
+};
+
+#endif