Discussion:
[Pixman] [PATCH] Adding infrastructure to permit future AVX2 implementations
raghuveer devulapalli
2018-08-22 17:02:02 UTC
Permalink
---
configure.ac | 44 ++++++++++++++++++++++++++++++++++++++++++++
pixman/Makefile.am | 12 ++++++++++++
pixman/pixman-avx2.c | 32 ++++++++++++++++++++++++++++++++
pixman/pixman-private.h | 5 +++++
pixman/pixman-x86.c | 15 +++++++++++++--
5 files changed, 106 insertions(+), 2 deletions(-)
create mode 100644 pixman/pixman-avx2.c

diff --git a/configure.ac b/configure.ac
index e833e45..27f4305 100644
--- a/configure.ac
+++ b/configure.ac
@@ -503,6 +503,48 @@ fi
AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes)

dnl ===========================================================================
+dnl Check for AVX2
+
+if test "x$AVX2_CFLAGS" = "x" ; then
+ AVX2_CFLAGS="-mavx2 -Winline"
+fi
+
+have_avx2_intrinsics=no
+AC_MSG_CHECKING(whether to use AVX2 intrinsics)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$AVX2_CFLAGS $CFLAGS"
+
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+#include <immintrin.h>
+int param;
+int main () {
+ __m256i a = _mm256_set1_epi32 (param), b = _mm256_set1_epi32 (param + 1), c;
+ c = _mm256_maddubs_epi16 (a, b);
+ return _mm256_cvtsi256_si32(c);
+}]])], have_avx2_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(avx2,
+ [AC_HELP_STRING([--disable-avx2],
+ [disable AVX2 fast paths])],
+ [enable_avx2=$enableval], [enable_avx2=auto])
+
+if test $enable_avx2 = no ; then
+ have_avx2_intrinsics=disabled
+fi
+
+if test $have_avx2_intrinsics = yes ; then
+ AC_DEFINE(USE_AVX2, 1, [use AVX2 compiler intrinsics])
+fi
+
+AC_MSG_RESULT($have_avx2_intrinsics)
+if test $enable_avx2 = yes && test $have_avx2_intrinsics = no ; then
+ AC_MSG_ERROR([AVX2 intrinsics not detected])
+fi
+
+AM_CONDITIONAL(USE_AVX2, test $have_avx2_intrinsics = yes)
+
+dnl ===========================================================================
dnl Other special flags needed when building code using MMX or SSE instructions
case $host_os in
solaris*)
@@ -538,6 +580,8 @@ AC_SUBST(MMX_LDFLAGS)
AC_SUBST(SSE2_CFLAGS)
AC_SUBST(SSE2_LDFLAGS)
AC_SUBST(SSSE3_CFLAGS)
+AC_SUBST(AVX2_CFLAGS)
+AC_SUBST(AVX2_LDFLAGS)

dnl ===========================================================================
dnl Check for VMX/Altivec
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 581b6f6..7204621 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -64,6 +64,18 @@ libpixman_1_la_LIBADD += libpixman-ssse3.la
ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS)
endif

+# avx2 code
+if USE_AVX2
+noinst_LTLIBRARIES += libpixman-avx2.la
+libpixman_avx2_la_SOURCES = \
+ pixman-avx2.c
+libpixman_avx2_la_CFLAGS = $(AVX2_CFLAGS)
+libpixman_1_la_LDFLAGS += $(AVX2_LDFLAGS)
+libpixman_1_la_LIBADD += libpixman-avx2.la
+
+ASM_CFLAGS_avx2=$(AVX2_CFLAGS)
+endif
+
# arm simd code
if USE_ARM_SIMD
noinst_LTLIBRARIES += libpixman-arm-simd.la
diff --git a/pixman/pixman-avx2.c b/pixman/pixman-avx2.c
new file mode 100644
index 0000000..d860d67
--- /dev/null
+++ b/pixman/pixman-avx2.c
@@ -0,0 +1,32 @@
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <immintrin.h> /* for AVX2 intrinsics */
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-inlines.h"
+
+static const pixman_fast_path_t avx2_fast_paths[] =
+{
+ { PIXMAN_OP_NONE },
+};
+
+static const pixman_iter_info_t avx2_iters[] =
+{
+ { PIXMAN_null },
+};
+
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+pixman_implementation_t *
+_pixman_implementation_create_avx2 (pixman_implementation_t *fallback)
+{
+ pixman_implementation_t *imp = _pixman_implementation_create (fallback, avx2_fast_paths);
+
+ /* Set up function pointers */
+ imp->iter_info = avx2_iters;
+
+ return imp;
+}
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 73a5414..b6b15df 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -597,6 +597,11 @@ pixman_implementation_t *
_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback);
#endif

+#ifdef USE_AVX2
+pixman_implementation_t *
+_pixman_implementation_create_avx2 (pixman_implementation_t *fallback);
+#endif
+
#ifdef USE_ARM_SIMD
pixman_implementation_t *
_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c
index 05297c4..687c83b 100644
--- a/pixman/pixman-x86.c
+++ b/pixman/pixman-x86.c
@@ -40,7 +40,8 @@ typedef enum
X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS,
X86_SSE2 = (1 << 3),
X86_CMOV = (1 << 4),
- X86_SSSE3 = (1 << 5)
+ X86_SSSE3 = (1 << 5),
+ X86_AVX2 = (1 << 6),
} cpu_features_t;

#ifdef HAVE_GETISAX
@@ -119,7 +120,7 @@ pixman_cpuid (uint32_t feature,
__asm__ volatile (
"cpuid" "\n\t"
: "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
- : "a" (feature));
+ : "a" (feature), "c" (0));
#else
/* On x86-32 we need to be careful about the handling of %ebx
* and %esp. We can't declare either one as clobbered
@@ -172,6 +173,10 @@ detect_cpu_features (void)
features |= X86_SSE2;
if (c & (1 << 9))
features |= X86_SSSE3;
+
+ pixman_cpuid (0x07, &a, &b, &c, &d);
+ if (b & (1 << 5))
+ features |= X86_AVX2;

/* Check for AMD specific features */
if ((features & X86_MMX) && !(features & X86_SSE))
@@ -228,6 +233,7 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)
#define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS)
#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
#define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3)
+#define AVX2_BITS (X86_AVX2)

#ifdef USE_X86_MMX
if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
@@ -244,5 +250,10 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)
imp = _pixman_implementation_create_ssse3 (imp);
#endif

+#if (defined USE_AVX2 && defined USE_SSE2)
+ if (!_pixman_disabled ("avx2") && have_feature (AVX2_BITS))
+ imp = _pixman_implementation_create_avx2(imp);
+#endif
+
return imp;
}
--
2.7.4
Chris Wilson
2018-08-23 08:03:18 UTC
Permalink
Quoting raghuveer devulapalli (2018-08-22 18:02:02)
Post by raghuveer devulapalli
#ifdef HAVE_GETISAX
@@ -119,7 +120,7 @@ pixman_cpuid (uint32_t feature,
__asm__ volatile (
"cpuid" "\n\t"
: "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
- : "a" (feature));
+ : "a" (feature), "c" (0));
#else
/* On x86-32 we need to be careful about the handling of %ebx
* and %esp. We can't declare either one as clobbered
@@ -172,6 +173,10 @@ detect_cpu_features (void)
features |= X86_SSE2;
if (c & (1 << 9))
features |= X86_SSSE3;
+
+ pixman_cpuid (0x07, &a, &b, &c, &d);
+ if (b & (1 << 5))
+ features |= X86_AVX2;
It's not enough to check for the cpu feature, you need to also check for
os support. Something like:

#define xgetbv(index,eax,edx) \
__asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))

#define has_YMM 0x1

unsigned cpu_detect(void)
{
unsigned max = __get_cpuid_max(BASIC_CPUID, NULL);
unsigned eax, ebx, ecx, edx;
unsigned features = 0;
unsigned extra = 0;

if (max >= 1) {
__cpuid(1, eax, ebx, ecx, edx);

/* snip all the usual sse features */

if (ecx & bit_OSXSAVE) {
unsigned int bv_eax, bv_ecx;
xgetbv(0, bv_eax, bv_ecx);
if ((bv_eax & 6) == 6)
extra |= has_YMM;
}

if ((extra & has_YMM) && (ecx & bit_AVX))
features |= AVX;
}

if (max >= 7) {
__cpuid_count(7, 0, eax, ebx, ecx, edx);

if ((extra & has_YMM) && (ebx & bit_AVX2))
features |= AVX2;
}

return features;
}
Matt Turner
2018-08-29 18:43:23 UTC
Permalink
Thank you for the patches! Some comments inline.

On Wed, Aug 22, 2018 at 10:03 AM raghuveer devulapalli
Post by raghuveer devulapalli
---
configure.ac | 44 ++++++++++++++++++++++++++++++++++++++++++++
pixman/Makefile.am | 12 ++++++++++++
pixman/pixman-avx2.c | 32 ++++++++++++++++++++++++++++++++
pixman/pixman-private.h | 5 +++++
pixman/pixman-x86.c | 15 +++++++++++++--
5 files changed, 106 insertions(+), 2 deletions(-)
create mode 100644 pixman/pixman-avx2.c
diff --git a/configure.ac b/configure.ac
index e833e45..27f4305 100644
--- a/configure.ac
+++ b/configure.ac
@@ -503,6 +503,48 @@ fi
AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes)
dnl ===========================================================================
+dnl Check for AVX2
Trailing whitespace
Post by raghuveer devulapalli
+
+if test "x$AVX2_CFLAGS" = "x" ; then
+ AVX2_CFLAGS="-mavx2 -Winline"
+fi
+
+have_avx2_intrinsics=no
+AC_MSG_CHECKING(whether to use AVX2 intrinsics)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$AVX2_CFLAGS $CFLAGS"
+
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+#include <immintrin.h>
+int param;
+int main () {
+ __m256i a = _mm256_set1_epi32 (param), b = _mm256_set1_epi32 (param + 1), c;
+ c = _mm256_maddubs_epi16 (a, b);
+ return _mm256_cvtsi256_si32(c);
+}]])], have_avx2_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(avx2,
+ [AC_HELP_STRING([--disable-avx2],
+ [disable AVX2 fast paths])],
+ [enable_avx2=$enableval], [enable_avx2=auto])
+
+if test $enable_avx2 = no ; then
+ have_avx2_intrinsics=disabled
+fi
+
+if test $have_avx2_intrinsics = yes ; then
+ AC_DEFINE(USE_AVX2, 1, [use AVX2 compiler intrinsics])
+fi
+
+AC_MSG_RESULT($have_avx2_intrinsics)
+if test $enable_avx2 = yes && test $have_avx2_intrinsics = no ; then
+ AC_MSG_ERROR([AVX2 intrinsics not detected])
+fi
+
+AM_CONDITIONAL(USE_AVX2, test $have_avx2_intrinsics = yes)
+
+dnl ===========================================================================
dnl Other special flags needed when building code using MMX or SSE instructions
case $host_os in
solaris*)
@@ -538,6 +580,8 @@ AC_SUBST(MMX_LDFLAGS)
AC_SUBST(SSE2_CFLAGS)
AC_SUBST(SSE2_LDFLAGS)
AC_SUBST(SSSE3_CFLAGS)
+AC_SUBST(AVX2_CFLAGS)
+AC_SUBST(AVX2_LDFLAGS)
dnl ===========================================================================
dnl Check for VMX/Altivec
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 581b6f6..7204621 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -64,6 +64,18 @@ libpixman_1_la_LIBADD += libpixman-ssse3.la
ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS)
endif
+# avx2 code
+if USE_AVX2
+noinst_LTLIBRARIES += libpixman-avx2.la
+libpixman_avx2_la_SOURCES = \
+ pixman-avx2.c
+libpixman_avx2_la_CFLAGS = $(AVX2_CFLAGS)
+libpixman_1_la_LDFLAGS += $(AVX2_LDFLAGS)
+libpixman_1_la_LIBADD += libpixman-avx2.la
+
+ASM_CFLAGS_avx2=$(AVX2_CFLAGS)
+endif
+
# arm simd code
if USE_ARM_SIMD
noinst_LTLIBRARIES += libpixman-arm-simd.la
diff --git a/pixman/pixman-avx2.c b/pixman/pixman-avx2.c
new file mode 100644
index 0000000..d860d67
--- /dev/null
+++ b/pixman/pixman-avx2.c
@@ -0,0 +1,32 @@
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <immintrin.h> /* for AVX2 intrinsics */
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-inlines.h"
+
+static const pixman_fast_path_t avx2_fast_paths[] =
+{
+ { PIXMAN_OP_NONE },
+};
+
+static const pixman_iter_info_t avx2_iters[] =
Trailing whitespace
Post by raghuveer devulapalli
+{
+ { PIXMAN_null },
+};
+
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+pixman_implementation_t *
+_pixman_implementation_create_avx2 (pixman_implementation_t *fallback)
+{
+ pixman_implementation_t *imp = _pixman_implementation_create (fallback, avx2_fast_paths);
+
+ /* Set up function pointers */
+ imp->iter_info = avx2_iters;
+
+ return imp;
+}
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 73a5414..b6b15df 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -597,6 +597,11 @@ pixman_implementation_t *
_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback);
#endif
+#ifdef USE_AVX2
+pixman_implementation_t *
+_pixman_implementation_create_avx2 (pixman_implementation_t *fallback);
+#endif
+
#ifdef USE_ARM_SIMD
pixman_implementation_t *
_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c
index 05297c4..687c83b 100644
--- a/pixman/pixman-x86.c
+++ b/pixman/pixman-x86.c
At the top of this file there is a preprocessor check:

#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3)

I think || defined (USE_AVX2) should be added here.
Post by raghuveer devulapalli
@@ -40,7 +40,8 @@ typedef enum
X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS,
X86_SSE2 = (1 << 3),
X86_CMOV = (1 << 4),
- X86_SSSE3 = (1 << 5)
+ X86_SSSE3 = (1 << 5),
+ X86_AVX2 = (1 << 6),
I'm not 100% we can use trailing commas in pixman due to MSVC.
Probably safer just to leave it off.
Post by raghuveer devulapalli
} cpu_features_t;
#ifdef HAVE_GETISAX
@@ -119,7 +120,7 @@ pixman_cpuid (uint32_t feature,
__asm__ volatile (
"cpuid" "\n\t"
: "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
- : "a" (feature));
+ : "a" (feature), "c" (0));
Just to make sure I'm understanding: cpuid returns AVX2 presence in
bit 5 of ebx when it is executed with eax=7 and ecx=0, so we need to
ensure ecx is set to 0?

I think that's fine. It seems like ecx isn't required to be any
particular value for the other cases. Perhaps a comment would help
future readers understand.
Post by raghuveer devulapalli
#else
/* On x86-32 we need to be careful about the handling of %ebx
* and %esp. We can't declare either one as clobbered
@@ -172,6 +173,10 @@ detect_cpu_features (void)
features |= X86_SSE2;
if (c & (1 << 9))
features |= X86_SSSE3;
+
Spurious whitespace
Post by raghuveer devulapalli
+ pixman_cpuid (0x07, &a, &b, &c, &d);
+ if (b & (1 << 5))
+ features |= X86_AVX2;
/* Check for AMD specific features */
if ((features & X86_MMX) && !(features & X86_SSE))
@@ -228,6 +233,7 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)
#define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS)
#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
#define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3)
+#define AVX2_BITS (X86_AVX2)
#ifdef USE_X86_MMX
if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
@@ -244,5 +250,10 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)
imp = _pixman_implementation_create_ssse3 (imp);
#endif
+#if (defined USE_AVX2 && defined USE_SSE2)
+ if (!_pixman_disabled ("avx2") && have_feature (AVX2_BITS))
+ imp = _pixman_implementation_create_avx2(imp);
+#endif
+
return imp;
}
--
2.7.4
Devulapalli, Raghuveer
2018-09-06 19:10:21 UTC
Permalink
Thanks Matt and Chris for your valuable feedback. I am incorporating your feedback and preparing updated patches. Unfortunately I am out of office for the next 2-3 weeks, but I will post them shortly after I get back.

Thanks,
Raghuveer

-----Original Message-----
From: Matt Turner [mailto:***@gmail.com]
Sent: Wednesday, August 29, 2018 11:43 AM
To: Devulapalli, Raghuveer <***@intel.com>
Cc: ***@lists.freedesktop.org
Subject: Re: [Pixman] [PATCH] Adding infrastructure to permit future AVX2 implementations

Thank you for the patches! Some comments inline.
Post by raghuveer devulapalli
---
configure.ac | 44 ++++++++++++++++++++++++++++++++++++++++++++
pixman/Makefile.am | 12 ++++++++++++
pixman/pixman-avx2.c | 32 ++++++++++++++++++++++++++++++++
pixman/pixman-private.h | 5 +++++
pixman/pixman-x86.c | 15 +++++++++++++--
5 files changed, 106 insertions(+), 2 deletions(-) create mode
100644 pixman/pixman-avx2.c
diff --git a/configure.ac b/configure.ac index e833e45..27f4305 100644
--- a/configure.ac
+++ b/configure.ac
@@ -503,6 +503,48 @@ fi
AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes)
dnl
======================================================================
=====
+dnl Check for AVX2
Trailing whitespace
Post by raghuveer devulapalli
+
+if test "x$AVX2_CFLAGS" = "x" ; then
+ AVX2_CFLAGS="-mavx2 -Winline"
+fi
+
+have_avx2_intrinsics=no
+AC_MSG_CHECKING(whether to use AVX2 intrinsics)
+xserver_save_CFLAGS=$CFLAGS CFLAGS="$AVX2_CFLAGS $CFLAGS"
+
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+#include <immintrin.h>
+int param;
+int main () {
+ __m256i a = _mm256_set1_epi32 (param), b = _mm256_set1_epi32 (param + 1), c;
+ c = _mm256_maddubs_epi16 (a, b);
+ return _mm256_cvtsi256_si32(c);
+}]])], have_avx2_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(avx2,
+ [AC_HELP_STRING([--disable-avx2],
+ [disable AVX2 fast paths])],
+ [enable_avx2=$enableval], [enable_avx2=auto])
+
+if test $enable_avx2 = no ; then
+ have_avx2_intrinsics=disabled
+fi
+
+if test $have_avx2_intrinsics = yes ; then
+ AC_DEFINE(USE_AVX2, 1, [use AVX2 compiler intrinsics]) fi
+
+AC_MSG_RESULT($have_avx2_intrinsics)
+if test $enable_avx2 = yes && test $have_avx2_intrinsics = no ; then
+ AC_MSG_ERROR([AVX2 intrinsics not detected]) fi
+
+AM_CONDITIONAL(USE_AVX2, test $have_avx2_intrinsics = yes)
+
+dnl
+=====================================================================
+======
dnl Other special flags needed when building code using MMX or SSE
instructions case $host_os in
solaris*)
@@ -538,6 +580,8 @@ AC_SUBST(MMX_LDFLAGS)
AC_SUBST(SSE2_CFLAGS)
AC_SUBST(SSE2_LDFLAGS)
AC_SUBST(SSSE3_CFLAGS)
+AC_SUBST(AVX2_CFLAGS)
+AC_SUBST(AVX2_LDFLAGS)
dnl
======================================================================
=====
dnl Check for VMX/Altivec
diff --git a/pixman/Makefile.am b/pixman/Makefile.am index
581b6f6..7204621 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -64,6 +64,18 @@ libpixman_1_la_LIBADD += libpixman-ssse3.la
ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS)
endif
+# avx2 code
+if USE_AVX2
+noinst_LTLIBRARIES += libpixman-avx2.la libpixman_avx2_la_SOURCES = \
+ pixman-avx2.c
+libpixman_avx2_la_CFLAGS = $(AVX2_CFLAGS) libpixman_1_la_LDFLAGS +=
+$(AVX2_LDFLAGS) libpixman_1_la_LIBADD += libpixman-avx2.la
+
+ASM_CFLAGS_avx2=$(AVX2_CFLAGS)
+endif
+
# arm simd code
if USE_ARM_SIMD
noinst_LTLIBRARIES += libpixman-arm-simd.la diff --git
a/pixman/pixman-avx2.c b/pixman/pixman-avx2.c new file mode 100644
index 0000000..d860d67
--- /dev/null
+++ b/pixman/pixman-avx2.c
@@ -0,0 +1,32 @@
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <immintrin.h> /* for AVX2 intrinsics */ #include
+"pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-inlines.h"
+
+static const pixman_fast_path_t avx2_fast_paths[] = {
+ { PIXMAN_OP_NONE },
+};
+
+static const pixman_iter_info_t avx2_iters[] =
Trailing whitespace
Post by raghuveer devulapalli
+{
+ { PIXMAN_null },
+};
+
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+pixman_implementation_t *
+_pixman_implementation_create_avx2 (pixman_implementation_t
+*fallback) {
+ pixman_implementation_t *imp = _pixman_implementation_create
+(fallback, avx2_fast_paths);
+
+ /* Set up function pointers */
+ imp->iter_info = avx2_iters;
+
+ return imp;
+}
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index
73a5414..b6b15df 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -597,6 +597,11 @@ pixman_implementation_t *
_pixman_implementation_create_ssse3 (pixman_implementation_t
*fallback); #endif
+#ifdef USE_AVX2
+pixman_implementation_t *
+_pixman_implementation_create_avx2 (pixman_implementation_t
+*fallback); #endif
+
#ifdef USE_ARM_SIMD
pixman_implementation_t *
_pixman_implementation_create_arm_simd (pixman_implementation_t
*fallback); diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c
index 05297c4..687c83b 100644
--- a/pixman/pixman-x86.c
+++ b/pixman/pixman-x86.c
At the top of this file there is a preprocessor check:

#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3)

I think || defined (USE_AVX2) should be added here.
Post by raghuveer devulapalli
@@ -40,7 +40,8 @@ typedef enum
X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS,
X86_SSE2 = (1 << 3),
X86_CMOV = (1 << 4),
- X86_SSSE3 = (1 << 5)
+ X86_SSSE3 = (1 << 5),
+ X86_AVX2 = (1 << 6),
I'm not 100% we can use trailing commas in pixman due to MSVC.
Probably safer just to leave it off.
Post by raghuveer devulapalli
} cpu_features_t;
#ifdef HAVE_GETISAX
@@ -119,7 +120,7 @@ pixman_cpuid (uint32_t feature,
__asm__ volatile (
"cpuid" "\n\t"
: "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
- : "a" (feature));
+ : "a" (feature), "c" (0));
Just to make sure I'm understanding: cpuid returns AVX2 presence in bit 5 of ebx when it is executed with eax=7 and ecx=0, so we need to ensure ecx is set to 0?

I think that's fine. It seems like ecx isn't required to be any particular value for the other cases. Perhaps a comment would help future readers understand.
Post by raghuveer devulapalli
#else
/* On x86-32 we need to be careful about the handling of %ebx
features |= X86_SSE2;
if (c & (1 << 9))
features |= X86_SSSE3;
+
Spurious whitespace
Post by raghuveer devulapalli
+ pixman_cpuid (0x07, &a, &b, &c, &d);
+ if (b & (1 << 5))
+ features |= X86_AVX2;
/* Check for AMD specific features */
*imp) #define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS) #define
SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2) #define
SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3)
+#define AVX2_BITS (X86_AVX2)
#ifdef USE_X86_MMX
imp = _pixman_implementation_create_ssse3 (imp); #endif
+#if (defined USE_AVX2 && defined USE_SSE2)
+ if (!_pixman_disabled ("avx2") && have_feature (AVX2_BITS))
+ imp = _pixman_implementation_create_avx2(imp);
+#endif
+
return imp;
}
--
2.7.4
Loading...