summaryrefslogtreecommitdiff
path: root/src/libspeex/vbr.c
diff options
context:
space:
mode:
authorMikko Tiusanen <ams@daug.net>2014-05-04 01:18:52 +0300
committerMikko Tiusanen <ams@daug.net>2014-05-04 01:18:52 +0300
commit01beb9919b95479d8be040bec74abc5cc67a5e43 (patch)
tree65f0b79e793848491832756a4c3a32b23668fab3 /src/libspeex/vbr.c
parent191d731da136b7ee959a17e63111c9146219a768 (diff)
Initial import.
Diffstat (limited to 'src/libspeex/vbr.c')
-rw-r--r--src/libspeex/vbr.c275
1 files changed, 275 insertions, 0 deletions
diff --git a/src/libspeex/vbr.c b/src/libspeex/vbr.c
new file mode 100644
index 0000000..5b7dd9b
--- /dev/null
+++ b/src/libspeex/vbr.c
@@ -0,0 +1,275 @@
+/* Copyright (C) 2002 Jean-Marc Valin
+ File: vbr.c
+
+ VBR-related routines
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "vbr.h"
+#include <math.h>
+
+
+#define sqr(x) ((x)*(x))
+
+#define MIN_ENERGY 6000
+#define NOISE_POW .3
+
+#ifndef DISABLE_VBR
+
+const float vbr_nb_thresh[9][11]={
+ {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* CNG */
+ { 4.0f, 2.5f, 2.0f, 1.2f, 0.5f, 0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /* 2 kbps */
+ {10.0f, 6.5f, 5.2f, 4.5f, 3.9f, 3.5f, 3.0f, 2.5f, 2.3f, 1.8f, 1.0f}, /* 6 kbps */
+ {11.0f, 8.8f, 7.5f, 6.5f, 5.0f, 3.9f, 3.9f, 3.9f, 3.5f, 3.0f, 1.0f}, /* 8 kbps */
+ {11.0f, 11.0f, 9.9f, 8.5f, 7.0f, 6.0f, 4.5f, 4.0f, 4.0f, 4.0f, 2.0f}, /* 11 kbps */
+ {11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 8.0f, 7.0f, 6.0f, 5.0f, 3.0f}, /* 15 kbps */
+ {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 7.0f, 6.0f, 5.0f}, /* 18 kbps */
+ {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 9.5f, 7.5f}, /* 24 kbps */
+ { 7.0f, 4.5f, 3.7f, 3.0f, 2.5f, 2.0f, 1.8f, 1.5f, 1.0f, 0.0f, 0.0f} /* 4 kbps */
+};
+
+
+const float vbr_hb_thresh[5][11]={
+ {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
+ {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* 2 kbps */
+ {11.0f, 11.0f, 9.5f, 8.5f, 7.5f, 6.0f, 5.0f, 3.9f, 3.0f, 2.0f, 1.0f}, /* 6 kbps */
+ {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.7f, 7.8f, 7.0f, 6.5f, 4.0f}, /* 10 kbps */
+ {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 7.5f, 5.5f} /* 18 kbps */
+};
+
+const float vbr_uhb_thresh[2][11]={
+ {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
+ { 3.9f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.0f} /* 2 kbps */
+};
+
+void vbr_init(VBRState *vbr)
+{
+ int i;
+
+ vbr->average_energy=0;
+ vbr->last_energy=1;
+ vbr->accum_sum=0;
+ vbr->energy_alpha=.1;
+ vbr->soft_pitch=0;
+ vbr->last_pitch_coef=0;
+ vbr->last_quality=0;
+
+ vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW);
+ vbr->noise_accum_count=.05;
+ vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
+ vbr->consec_noise=0;
+
+
+ for (i=0;i<VBR_MEMORY_SIZE;i++)
+ vbr->last_log_energy[i] = log(MIN_ENERGY);
+}
+
+
+/*
+ This function should analyse the signal and decide how critical the
+ coding error will be perceptually. The following factors should be
+ taken into account:
+
+ -Attacks (positive energy derivative) should be coded with more bits
+
+ -Stationary voiced segments should receive more bits
+
+ -Segments with (very) low absolute energy should receive less bits (maybe
+ only shaped noise?)
+
+ -DTX for near-zero energy?
+
+ -Stationary fricative segments should have less bits
+
+ -Temporal masking: when energy slope is decreasing, decrease the bit-rate
+
+ -Decrease bit-rate for males (low pitch)?
+
+ -(wideband only) less bits in the high-band when signal is very
+ non-stationary (harder to notice high-frequency noise)???
+
+*/
+
+float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef)
+{
+ int i;
+ float ener=0, ener1=0, ener2=0;
+ float qual=7;
+ int va;
+ float log_energy;
+ float non_st=0;
+ float voicing;
+ float pow_ener;
+
+ for (i=0;i<len>>1;i++)
+ ener1 += ((float)sig[i])*sig[i];
+
+ for (i=len>>1;i<len;i++)
+ ener2 += ((float)sig[i])*sig[i];
+ ener=ener1+ener2;
+
+ log_energy = log(ener+MIN_ENERGY);
+ for (i=0;i<VBR_MEMORY_SIZE;i++)
+ non_st += sqr(log_energy-vbr->last_log_energy[i]);
+ non_st = non_st/(30*VBR_MEMORY_SIZE);
+ if (non_st>1)
+ non_st=1;
+
+ voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4);
+ vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener;
+ vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
+ pow_ener = pow(ener,NOISE_POW);
+ if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
+ vbr->noise_accum = .05*pow_ener;
+
+ if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
+ || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
+ || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
+ || (voicing<0 && non_st < .05))
+ {
+ float tmp;
+ va = 0;
+ vbr->consec_noise++;
+ if (pow_ener > 3*vbr->noise_level)
+ tmp = 3*vbr->noise_level;
+ else
+ tmp = pow_ener;
+ if (vbr->consec_noise>=4)
+ {
+ vbr->noise_accum = .95*vbr->noise_accum + .05*tmp;
+ vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
+ }
+ } else {
+ va = 1;
+ vbr->consec_noise=0;
+ }
+
+ if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
+ {
+ vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
+ vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
+ }
+
+ /* Checking for very low absolute energy */
+ if (ener < 30000)
+ {
+ qual -= .7;
+ if (ener < 10000)
+ qual-=.7;
+ if (ener < 3000)
+ qual-=.7;
+ } else {
+ float short_diff, long_diff;
+ short_diff = log((ener+1)/(1+vbr->last_energy));
+ long_diff = log((ener+1)/(1+vbr->average_energy));
+ /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/
+
+ if (long_diff<-5)
+ long_diff=-5;
+ if (long_diff>2)
+ long_diff=2;
+
+ if (long_diff>0)
+ qual += .6*long_diff;
+ if (long_diff<0)
+ qual += .5*long_diff;
+ if (short_diff>0)
+ {
+ if (short_diff>5)
+ short_diff=5;
+ qual += .5*short_diff;
+ }
+ /* Checking for energy increases */
+ if (ener2 > 1.6*ener1)
+ qual += .5;
+ }
+ vbr->last_energy = ener;
+ vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef;
+ qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4));
+
+ if (qual < vbr->last_quality)
+ qual = .5*qual + .5*vbr->last_quality;
+ if (qual<4)
+ qual=4;
+ if (qual>10)
+ qual=10;
+
+ /*
+ if (vbr->consec_noise>=2)
+ qual-=1.3;
+ if (vbr->consec_noise>=5)
+ qual-=1.3;
+ if (vbr->consec_noise>=12)
+ qual-=1.3;
+ */
+ if (vbr->consec_noise>=3)
+ qual=4;
+
+ if (vbr->consec_noise)
+ qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
+ if (qual<0)
+ qual=0;
+
+ if (ener<60000)
+ {
+ if (vbr->consec_noise>2)
+ qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
+ if (ener<10000&&vbr->consec_noise>2)
+ qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
+ if (qual<0)
+ qual=0;
+ qual += .3*log(.0001+ener/60000.0);
+ }
+ if (qual<-1)
+ qual=-1;
+
+ /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/
+
+ vbr->last_pitch_coef = pitch_coef;
+ vbr->last_quality = qual;
+
+ for (i=VBR_MEMORY_SIZE-1;i>0;i--)
+ vbr->last_log_energy[i] = vbr->last_log_energy[i-1];
+ vbr->last_log_energy[0] = log_energy;
+
+ /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/
+
+ return qual;
+}
+
+void vbr_destroy(VBRState *vbr)
+{
+}
+
+#endif /* #ifndef DISABLE_VBR */