parent
599a1a2256
commit
794e0957a4
|
@ -0,0 +1,67 @@
|
|||
--- configure.ac
|
||||
+++ configure.ac
|
||||
@@ -195,7 +195,7 @@ else
|
||||
src/dotprod/src/dotprod_rrrf.av.o \
|
||||
src/dotprod/src/dotprod_crcf.av.o \
|
||||
src/dotprod/src/sumsq.o"
|
||||
- ARCH_OPTION="-fno-common -faltivec";;
|
||||
+ ARCH_OPTION="-fno-common -maltivec";;
|
||||
armv1*|armv2*|armv3*|armv4*|armv5*|armv6*)
|
||||
# assume neon instructions are NOT available
|
||||
MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.o \
|
||||
--- src/dotprod/src/dotprod_crcf.av.c
|
||||
+++ src/dotprod/src/dotprod_crcf.av.c
|
||||
@@ -26,6 +26,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
+#include <altivec.h>
|
||||
|
||||
#include "liquid.internal.h"
|
||||
|
||||
@@ -179,7 +180,7 @@ void dotprod_crcf_execute(dotprod_crcf _q,
|
||||
// split into four vectors each with four 32-bit
|
||||
// partial sums. Effectively each loop iteration
|
||||
// operates on 16 input samples at a time.
|
||||
- s0 = s1 = s2 = s3 = (vector float)(0);
|
||||
+ s0 = s1 = s2 = s3 = (vector float){0.0f, 0.0f, 0.0f, 0.0f};
|
||||
while (nblocks >= 4) {
|
||||
s0 = vec_madd(ar[nblocks-1],d[nblocks-1],s0);
|
||||
s1 = vec_madd(ar[nblocks-2],d[nblocks-2],s1);
|
||||
@@ -200,7 +201,7 @@ void dotprod_crcf_execute(dotprod_crcf _q,
|
||||
// move the result into the union s (effetively,
|
||||
// this loads the four 32-bit values in s0 into
|
||||
// the array w).
|
||||
- s.v = vec_add(s0,(vector float)(0));
|
||||
+ s.v = vec_add(s0,(vector float){0.0f, 0.0f, 0.0f, 0.0f});
|
||||
|
||||
// sum the resulting array
|
||||
//*_r = s.w[0] + s.w[1] + s.w[2] + s.w[3];
|
||||
--- src/dotprod/src/dotprod_rrrf.av.c
|
||||
+++ src/dotprod/src/dotprod_rrrf.av.c
|
||||
@@ -26,6 +26,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
+#include <altivec.h>
|
||||
|
||||
#include "liquid.internal.h"
|
||||
|
||||
@@ -173,7 +174,7 @@ void dotprod_rrrf_execute(dotprod_rrrf _q,
|
||||
// split into four vectors each with four 32-bit
|
||||
// partial sums. Effectively each loop iteration
|
||||
// operates on 16 input samples at a time.
|
||||
- s0 = s1 = s2 = s3 = (vector float)(0);
|
||||
+ s0 = s1 = s2 = s3 = (vector float){0.0f, 0.0f, 0.0f, 0.0f};
|
||||
while (nblocks >= 4) {
|
||||
s0 = vec_madd(ar[nblocks-1],d[nblocks-1],s0);
|
||||
s1 = vec_madd(ar[nblocks-2],d[nblocks-2],s1);
|
||||
@@ -194,7 +195,7 @@ void dotprod_rrrf_execute(dotprod_rrrf _q,
|
||||
// move the result into the union s (effetively,
|
||||
// this loads the four 32-bit values in s0 into
|
||||
// the array w).
|
||||
- s.v = vec_add(s0,(vector float)(0));
|
||||
+ s.v = vec_add(s0,(vector float){0.0f, 0.0f, 0.0f, 0.0f});
|
||||
|
||||
// sum the resulting array
|
||||
*_r = s.w[0] + s.w[1] + s.w[2] + s.w[3];
|
Loading…
Reference in New Issue