diff options
Diffstat (limited to 'src/libspeex/vbr.c')
-rw-r--r-- | src/libspeex/vbr.c | 275 |
1 files changed, 275 insertions, 0 deletions
diff --git a/src/libspeex/vbr.c b/src/libspeex/vbr.c new file mode 100644 index 00000000..5b7dd9bf --- /dev/null +++ b/src/libspeex/vbr.c @@ -0,0 +1,275 @@ +/* Copyright (C) 2002 Jean-Marc Valin + File: vbr.c + + VBR-related routines + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the Xiph.org Foundation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "vbr.h" +#include <math.h> + + +#define sqr(x) ((x)*(x)) + +#define MIN_ENERGY 6000 +#define NOISE_POW .3 + +#ifndef DISABLE_VBR + +const float vbr_nb_thresh[9][11]={ + {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* CNG */ + { 4.0f, 2.5f, 2.0f, 1.2f, 0.5f, 0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /* 2 kbps */ + {10.0f, 6.5f, 5.2f, 4.5f, 3.9f, 3.5f, 3.0f, 2.5f, 2.3f, 1.8f, 1.0f}, /* 6 kbps */ + {11.0f, 8.8f, 7.5f, 6.5f, 5.0f, 3.9f, 3.9f, 3.9f, 3.5f, 3.0f, 1.0f}, /* 8 kbps */ + {11.0f, 11.0f, 9.9f, 8.5f, 7.0f, 6.0f, 4.5f, 4.0f, 4.0f, 4.0f, 2.0f}, /* 11 kbps */ + {11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 8.0f, 7.0f, 6.0f, 5.0f, 3.0f}, /* 15 kbps */ + {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 7.0f, 6.0f, 5.0f}, /* 18 kbps */ + {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 9.5f, 7.5f}, /* 24 kbps */ + { 7.0f, 4.5f, 3.7f, 3.0f, 2.5f, 2.0f, 1.8f, 1.5f, 1.0f, 0.0f, 0.0f} /* 4 kbps */ +}; + + +const float vbr_hb_thresh[5][11]={ + {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ + {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* 2 kbps */ + {11.0f, 11.0f, 9.5f, 8.5f, 7.5f, 6.0f, 5.0f, 3.9f, 3.0f, 2.0f, 1.0f}, /* 6 kbps */ + {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.7f, 7.8f, 7.0f, 6.5f, 4.0f}, /* 10 kbps */ + {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 7.5f, 5.5f} /* 18 kbps */ +}; + +const float vbr_uhb_thresh[2][11]={ + {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ + { 3.9f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.0f} /* 2 kbps */ +}; + +void vbr_init(VBRState *vbr) +{ + int i; + + vbr->average_energy=0; + vbr->last_energy=1; + vbr->accum_sum=0; + vbr->energy_alpha=.1; + vbr->soft_pitch=0; + vbr->last_pitch_coef=0; + vbr->last_quality=0; + + vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW); + vbr->noise_accum_count=.05; + vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; + vbr->consec_noise=0; + + + for (i=0;i<VBR_MEMORY_SIZE;i++) + vbr->last_log_energy[i] = log(MIN_ENERGY); +} + + +/* + This function should analyse the signal and decide how critical the + coding error will be perceptually. The following factors should be + taken into account: + + -Attacks (positive energy derivative) should be coded with more bits + + -Stationary voiced segments should receive more bits + + -Segments with (very) low absolute energy should receive less bits (maybe + only shaped noise?) + + -DTX for near-zero energy? + + -Stationary fricative segments should have less bits + + -Temporal masking: when energy slope is decreasing, decrease the bit-rate + + -Decrease bit-rate for males (low pitch)? + + -(wideband only) less bits in the high-band when signal is very + non-stationary (harder to notice high-frequency noise)??? + +*/ + +float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef) +{ + int i; + float ener=0, ener1=0, ener2=0; + float qual=7; + int va; + float log_energy; + float non_st=0; + float voicing; + float pow_ener; + + for (i=0;i<len>>1;i++) + ener1 += ((float)sig[i])*sig[i]; + + for (i=len>>1;i<len;i++) + ener2 += ((float)sig[i])*sig[i]; + ener=ener1+ener2; + + log_energy = log(ener+MIN_ENERGY); + for (i=0;i<VBR_MEMORY_SIZE;i++) + non_st += sqr(log_energy-vbr->last_log_energy[i]); + non_st = non_st/(30*VBR_MEMORY_SIZE); + if (non_st>1) + non_st=1; + + voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4); + vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener; + vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; + pow_ener = pow(ener,NOISE_POW); + if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY) + vbr->noise_accum = .05*pow_ener; + + if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level) + || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level) + || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level) + || (voicing<0 && non_st < .05)) + { + float tmp; + va = 0; + vbr->consec_noise++; + if (pow_ener > 3*vbr->noise_level) + tmp = 3*vbr->noise_level; + else + tmp = pow_ener; + if (vbr->consec_noise>=4) + { + vbr->noise_accum = .95*vbr->noise_accum + .05*tmp; + vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; + } + } else { + va = 1; + vbr->consec_noise=0; + } + + if (pow_ener < vbr->noise_level && ener>MIN_ENERGY) + { + vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener; + vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; + } + + /* Checking for very low absolute energy */ + if (ener < 30000) + { + qual -= .7; + if (ener < 10000) + qual-=.7; + if (ener < 3000) + qual-=.7; + } else { + float short_diff, long_diff; + short_diff = log((ener+1)/(1+vbr->last_energy)); + long_diff = log((ener+1)/(1+vbr->average_energy)); + /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/ + + if (long_diff<-5) + long_diff=-5; + if (long_diff>2) + long_diff=2; + + if (long_diff>0) + qual += .6*long_diff; + if (long_diff<0) + qual += .5*long_diff; + if (short_diff>0) + { + if (short_diff>5) + short_diff=5; + qual += .5*short_diff; + } + /* Checking for energy increases */ + if (ener2 > 1.6*ener1) + qual += .5; + } + vbr->last_energy = ener; + vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef; + qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4)); + + if (qual < vbr->last_quality) + qual = .5*qual + .5*vbr->last_quality; + if (qual<4) + qual=4; + if (qual>10) + qual=10; + + /* + if (vbr->consec_noise>=2) + qual-=1.3; + if (vbr->consec_noise>=5) + qual-=1.3; + if (vbr->consec_noise>=12) + qual-=1.3; + */ + if (vbr->consec_noise>=3) + qual=4; + + if (vbr->consec_noise) + qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3)); + if (qual<0) + qual=0; + + if (ener<60000) + { + if (vbr->consec_noise>2) + qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); + if (ener<10000&&vbr->consec_noise>2) + qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); + if (qual<0) + qual=0; + qual += .3*log(.0001+ener/60000.0); + } + if (qual<-1) + qual=-1; + + /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/ + + vbr->last_pitch_coef = pitch_coef; + vbr->last_quality = qual; + + for (i=VBR_MEMORY_SIZE-1;i>0;i--) + vbr->last_log_energy[i] = vbr->last_log_energy[i-1]; + vbr->last_log_energy[0] = log_energy; + + /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/ + + return qual; +} + +void vbr_destroy(VBRState *vbr) +{ +} + +#endif /* #ifndef DISABLE_VBR */ |