summaryrefslogtreecommitdiff
path: root/src/libspeex/ltp_sse.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/libspeex/ltp_sse.h')
-rw-r--r--src/libspeex/ltp_sse.h92
1 files changed, 92 insertions, 0 deletions
diff --git a/src/libspeex/ltp_sse.h b/src/libspeex/ltp_sse.h
new file mode 100644
index 0000000..bed6eaa
--- /dev/null
+++ b/src/libspeex/ltp_sse.h
@@ -0,0 +1,92 @@
+/* Copyright (C) 2002 Jean-Marc Valin */
+/**
+ @file ltp_sse.h
+ @brief Long-Term Prediction functions (SSE version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <xmmintrin.h>
+
+#define OVERRIDE_INNER_PROD
+float inner_prod(const float *a, const float *b, int len)
+{
+ int i;
+ float ret;
+ __m128 sum = _mm_setzero_ps();
+ for (i=0;i<(len>>2);i+=2)
+ {
+ sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+0), _mm_loadu_ps(b+0)));
+ sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+4), _mm_loadu_ps(b+4)));
+ a += 8;
+ b += 8;
+ }
+ sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+ sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+ _mm_store_ss(&ret, sum);
+ return ret;
+}
+
+#define OVERRIDE_PITCH_XCORR
+void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
+{
+ int i, offset;
+ VARDECL(__m128 *x);
+ VARDECL(__m128 *y);
+ int N, L;
+ N = len>>2;
+ L = nb_pitch>>2;
+ ALLOC(x, N, __m128);
+ ALLOC(y, N+L, __m128);
+ for (i=0;i<N;i++)
+ x[i] = _mm_loadu_ps(_x+(i<<2));
+ for (offset=0;offset<4;offset++)
+ {
+ for (i=0;i<N+L;i++)
+ y[i] = _mm_loadu_ps(_y+(i<<2)+offset);
+ for (i=0;i<L;i++)
+ {
+ int j;
+ __m128 sum, *xx, *yy;
+ sum = _mm_setzero_ps();
+ yy = y+i;
+ xx = x;
+ for (j=0;j<N;j+=2)
+ {
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[0], yy[0]));
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[1], yy[1]));
+ xx += 2;
+ yy += 2;
+ }
+ sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+ sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+ _mm_store_ss(corr+nb_pitch-1-(i<<2)-offset, sum);
+ }
+ }
+}