Updated FFMPEG to version 1.1.2, using this project: http://sourceforge.net/projects/ffmpeg4android/
This commit is contained in:
5
project/jni/ffmpeg/libavutil/x86/Makefile
Normal file
5
project/jni/ffmpeg/libavutil/x86/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
OBJS += x86/cpu.o \
|
||||
x86/float_dsp_init.o \
|
||||
|
||||
YASM-OBJS += x86/cpuid.o \
|
||||
x86/float_dsp.o \
|
||||
110
project/jni/ffmpeg/libavutil/x86/asm.h
Normal file
110
project/jni/ffmpeg/libavutil/x86/asm.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_ASM_H
|
||||
#define AVUTIL_X86_ASM_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
|
||||
#if ARCH_X86_64
|
||||
# define OPSIZE "q"
|
||||
# define REG_a "rax"
|
||||
# define REG_b "rbx"
|
||||
# define REG_c "rcx"
|
||||
# define REG_d "rdx"
|
||||
# define REG_D "rdi"
|
||||
# define REG_S "rsi"
|
||||
# define PTR_SIZE "8"
|
||||
typedef int64_t x86_reg;
|
||||
|
||||
# define REG_SP "rsp"
|
||||
# define REG_BP "rbp"
|
||||
# define REGBP rbp
|
||||
# define REGa rax
|
||||
# define REGb rbx
|
||||
# define REGc rcx
|
||||
# define REGd rdx
|
||||
# define REGSP rsp
|
||||
|
||||
#elif ARCH_X86_32
|
||||
|
||||
# define OPSIZE "l"
|
||||
# define REG_a "eax"
|
||||
# define REG_b "ebx"
|
||||
# define REG_c "ecx"
|
||||
# define REG_d "edx"
|
||||
# define REG_D "edi"
|
||||
# define REG_S "esi"
|
||||
# define PTR_SIZE "4"
|
||||
typedef int32_t x86_reg;
|
||||
|
||||
# define REG_SP "esp"
|
||||
# define REG_BP "ebp"
|
||||
# define REGBP ebp
|
||||
# define REGa eax
|
||||
# define REGb ebx
|
||||
# define REGc ecx
|
||||
# define REGd edx
|
||||
# define REGSP esp
|
||||
#else
|
||||
typedef int x86_reg;
|
||||
#endif
|
||||
|
||||
#define HAVE_7REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE && HAVE_EBP_AVAILABLE))
|
||||
#define HAVE_6REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE || HAVE_EBP_AVAILABLE))
|
||||
|
||||
#if ARCH_X86_64 && defined(PIC)
|
||||
# define BROKEN_RELOCATIONS 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If gcc is not set to support sse (-msse) it will not accept xmm registers
|
||||
* in the clobber list for inline asm. XMM_CLOBBERS takes a list of xmm
|
||||
* registers to be marked as clobbered and evaluates to nothing if they are
|
||||
* not supported, or to the list itself if they are supported. Since a clobber
|
||||
* list may not be empty, XMM_CLOBBERS_ONLY should be used if the xmm
|
||||
* registers are the only in the clobber list.
|
||||
* For example a list with "eax" and "xmm0" as clobbers should become:
|
||||
* : XMM_CLOBBERS("xmm0",) "eax"
|
||||
* and a list with only "xmm0" should become:
|
||||
* XMM_CLOBBERS_ONLY("xmm0")
|
||||
*/
|
||||
#if HAVE_XMM_CLOBBERS
|
||||
# define XMM_CLOBBERS(...) __VA_ARGS__
|
||||
# define XMM_CLOBBERS_ONLY(...) : __VA_ARGS__
|
||||
#else
|
||||
# define XMM_CLOBBERS(...)
|
||||
# define XMM_CLOBBERS_ONLY(...)
|
||||
#endif
|
||||
|
||||
/* Use to export labels from asm. */
|
||||
#define LABEL_MANGLE(a) EXTERN_PREFIX #a
|
||||
|
||||
// Use rip-relative addressing if compiling PIC code on x86-64.
|
||||
#if ARCH_X86_64 && defined(PIC)
|
||||
# define LOCAL_MANGLE(a) #a "(%%rip)"
|
||||
#else
|
||||
# define LOCAL_MANGLE(a) #a
|
||||
#endif
|
||||
|
||||
#define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
|
||||
|
||||
#endif /* AVUTIL_X86_ASM_H */
|
||||
61
project/jni/ffmpeg/libavutil/x86/bswap.h
Normal file
61
project/jni/ffmpeg/libavutil/x86/bswap.h
Normal file
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* byte swapping routines
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_BSWAP_H
|
||||
#define AVUTIL_X86_BSWAP_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
#if !AV_GCC_VERSION_AT_LEAST(4,1)
|
||||
#define av_bswap16 av_bswap16
|
||||
static av_always_inline av_const unsigned av_bswap16(unsigned x)
|
||||
{
|
||||
__asm__("rorw $8, %w0" : "+r"(x));
|
||||
return x;
|
||||
}
|
||||
#endif /* !AV_GCC_VERSION_AT_LEAST(4,1) */
|
||||
|
||||
#if !AV_GCC_VERSION_AT_LEAST(4,5)
|
||||
#define av_bswap32 av_bswap32
|
||||
static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
|
||||
{
|
||||
__asm__("bswap %0" : "+r" (x));
|
||||
return x;
|
||||
}
|
||||
|
||||
#if ARCH_X86_64
|
||||
#define av_bswap64 av_bswap64
|
||||
static inline uint64_t av_const av_bswap64(uint64_t x)
|
||||
{
|
||||
__asm__("bswap %0": "=r" (x) : "0" (x));
|
||||
return x;
|
||||
}
|
||||
#endif
|
||||
#endif /* !AV_GCC_VERSION_AT_LEAST(4,5) */
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
#endif /* AVUTIL_X86_BSWAP_H */
|
||||
201
project/jni/ffmpeg/libavutil/x86/cpu.c
Normal file
201
project/jni/ffmpeg/libavutil/x86/cpu.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/*
|
||||
* CPU detection code, extracted from mmx.h
|
||||
* (c)1997-99 by H. Dietz and R. Fisher
|
||||
* Converted to C and improved by Fabrice Bellard.
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavutil/cpu.h"
|
||||
|
||||
#if HAVE_YASM
|
||||
|
||||
#define cpuid(index, eax, ebx, ecx, edx) \
|
||||
ff_cpu_cpuid(index, &eax, &ebx, &ecx, &edx)
|
||||
|
||||
#define xgetbv(index, eax, edx) \
|
||||
ff_cpu_xgetbv(index, &eax, &edx)
|
||||
|
||||
#elif HAVE_INLINE_ASM
|
||||
|
||||
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
|
||||
#define cpuid(index, eax, ebx, ecx, edx) \
|
||||
__asm__ volatile ( \
|
||||
"mov %%"REG_b", %%"REG_S" \n\t" \
|
||||
"cpuid \n\t" \
|
||||
"xchg %%"REG_b", %%"REG_S \
|
||||
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \
|
||||
: "0" (index))
|
||||
|
||||
#define xgetbv(index, eax, edx) \
|
||||
__asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
|
||||
|
||||
#define get_eflags(x) \
|
||||
__asm__ volatile ("pushfl \n" \
|
||||
"pop %0 \n" \
|
||||
: "=r"(x))
|
||||
|
||||
#define set_eflags(x) \
|
||||
__asm__ volatile ("push %0 \n" \
|
||||
"popfl \n" \
|
||||
:: "r"(x))
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#if ARCH_X86_64
|
||||
|
||||
#define cpuid_test() 1
|
||||
|
||||
#elif HAVE_YASM
|
||||
|
||||
#define cpuid_test ff_cpu_cpuid_test
|
||||
|
||||
#elif HAVE_INLINE_ASM
|
||||
|
||||
static int cpuid_test(void)
|
||||
{
|
||||
x86_reg a, c;
|
||||
|
||||
/* Check if CPUID is supported by attempting to toggle the ID bit in
|
||||
* the EFLAGS register. */
|
||||
get_eflags(a);
|
||||
set_eflags(a ^ 0x200000);
|
||||
get_eflags(c);
|
||||
|
||||
return a != c;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Function to test if multimedia instructions are supported... */
|
||||
int ff_get_cpu_flags_x86(void)
|
||||
{
|
||||
int rval = 0;
|
||||
|
||||
#ifdef cpuid
|
||||
|
||||
int eax, ebx, ecx, edx;
|
||||
int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
|
||||
int family = 0, model = 0;
|
||||
union { int i[3]; char c[12]; } vendor;
|
||||
|
||||
if (!cpuid_test())
|
||||
return 0; /* CPUID not supported */
|
||||
|
||||
cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]);
|
||||
|
||||
if (max_std_level >= 1) {
|
||||
cpuid(1, eax, ebx, ecx, std_caps);
|
||||
family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
|
||||
model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
|
||||
if (std_caps & (1 << 15))
|
||||
rval |= AV_CPU_FLAG_CMOV;
|
||||
if (std_caps & (1 << 23))
|
||||
rval |= AV_CPU_FLAG_MMX;
|
||||
if (std_caps & (1 << 25))
|
||||
rval |= AV_CPU_FLAG_MMXEXT;
|
||||
#if HAVE_SSE
|
||||
if (std_caps & (1 << 25))
|
||||
rval |= AV_CPU_FLAG_SSE;
|
||||
if (std_caps & (1 << 26))
|
||||
rval |= AV_CPU_FLAG_SSE2;
|
||||
if (ecx & 1)
|
||||
rval |= AV_CPU_FLAG_SSE3;
|
||||
if (ecx & 0x00000200 )
|
||||
rval |= AV_CPU_FLAG_SSSE3;
|
||||
if (ecx & 0x00080000 )
|
||||
rval |= AV_CPU_FLAG_SSE4;
|
||||
if (ecx & 0x00100000 )
|
||||
rval |= AV_CPU_FLAG_SSE42;
|
||||
#if HAVE_AVX
|
||||
/* Check OXSAVE and AVX bits */
|
||||
if ((ecx & 0x18000000) == 0x18000000) {
|
||||
/* Check for OS support */
|
||||
xgetbv(0, eax, edx);
|
||||
if ((eax & 0x6) == 0x6)
|
||||
rval |= AV_CPU_FLAG_AVX;
|
||||
}
|
||||
#endif /* HAVE_AVX */
|
||||
#endif /* HAVE_SSE */
|
||||
}
|
||||
|
||||
cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
|
||||
|
||||
if (max_ext_level >= 0x80000001) {
|
||||
cpuid(0x80000001, eax, ebx, ecx, ext_caps);
|
||||
if (ext_caps & (1U << 31))
|
||||
rval |= AV_CPU_FLAG_3DNOW;
|
||||
if (ext_caps & (1 << 30))
|
||||
rval |= AV_CPU_FLAG_3DNOWEXT;
|
||||
if (ext_caps & (1 << 23))
|
||||
rval |= AV_CPU_FLAG_MMX;
|
||||
if (ext_caps & (1 << 22))
|
||||
rval |= AV_CPU_FLAG_MMXEXT;
|
||||
|
||||
/* Allow for selectively disabling SSE2 functions on AMD processors
|
||||
with SSE2 support but not SSE4a. This includes Athlon64, some
|
||||
Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
|
||||
than SSE2 often enough to utilize this special-case flag.
|
||||
AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
|
||||
so that SSE2 is used unless explicitly disabled by checking
|
||||
AV_CPU_FLAG_SSE2SLOW. */
|
||||
if (!strncmp(vendor.c, "AuthenticAMD", 12) &&
|
||||
rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040)) {
|
||||
rval |= AV_CPU_FLAG_SSE2SLOW;
|
||||
}
|
||||
|
||||
/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
|
||||
* used unless the OS has AVX support. */
|
||||
if (rval & AV_CPU_FLAG_AVX) {
|
||||
if (ecx & 0x00000800)
|
||||
rval |= AV_CPU_FLAG_XOP;
|
||||
if (ecx & 0x00010000)
|
||||
rval |= AV_CPU_FLAG_FMA4;
|
||||
}
|
||||
}
|
||||
|
||||
if (!strncmp(vendor.c, "GenuineIntel", 12)) {
|
||||
if (family == 6 && (model == 9 || model == 13 || model == 14)) {
|
||||
/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
|
||||
* 6/14 (core1 "yonah") theoretically support sse2, but it's
|
||||
* usually slower than mmx, so let's just pretend they don't.
|
||||
* AV_CPU_FLAG_SSE2 is disabled and AV_CPU_FLAG_SSE2SLOW is
|
||||
* enabled so that SSE2 is not used unless explicitly enabled
|
||||
* by checking AV_CPU_FLAG_SSE2SLOW. The same situation
|
||||
* applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */
|
||||
if (rval & AV_CPU_FLAG_SSE2)
|
||||
rval ^= AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE2;
|
||||
if (rval & AV_CPU_FLAG_SSE3)
|
||||
rval ^= AV_CPU_FLAG_SSE3SLOW | AV_CPU_FLAG_SSE3;
|
||||
}
|
||||
/* The Atom processor has SSSE3 support, which is useful in many cases,
|
||||
* but sometimes the SSSE3 version is slower than the SSE2 equivalent
|
||||
* on the Atom, but is generally faster on other processors supporting
|
||||
* SSSE3. This flag allows for selectively disabling certain SSSE3
|
||||
* functions on the Atom. */
|
||||
if (family == 6 && model == 28)
|
||||
rval |= AV_CPU_FLAG_ATOM;
|
||||
}
|
||||
|
||||
#endif /* cpuid */
|
||||
|
||||
return rval;
|
||||
}
|
||||
61
project/jni/ffmpeg/libavutil/x86/cpu.h
Normal file
61
project/jni/ffmpeg/libavutil/x86/cpu.h
Normal file
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_CPU_H
|
||||
#define AVUTIL_X86_CPU_H
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/cpu.h"
|
||||
|
||||
#define CPUEXT(flags, suffix, cpuext) \
|
||||
(HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext))
|
||||
|
||||
#define AV_CPU_FLAG_AMD3DNOW AV_CPU_FLAG_3DNOW
|
||||
#define AV_CPU_FLAG_AMD3DNOWEXT AV_CPU_FLAG_3DNOWEXT
|
||||
|
||||
#define EXTERNAL_AMD3DNOW(flags) CPUEXT(flags, _EXTERNAL, AMD3DNOW)
|
||||
#define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT(flags, _EXTERNAL, AMD3DNOWEXT)
|
||||
#define EXTERNAL_MMX(flags) CPUEXT(flags, _EXTERNAL, MMX)
|
||||
#define EXTERNAL_MMXEXT(flags) CPUEXT(flags, _EXTERNAL, MMXEXT)
|
||||
#define EXTERNAL_SSE(flags) CPUEXT(flags, _EXTERNAL, SSE)
|
||||
#define EXTERNAL_SSE2(flags) CPUEXT(flags, _EXTERNAL, SSE2)
|
||||
#define EXTERNAL_SSE3(flags) CPUEXT(flags, _EXTERNAL, SSE3)
|
||||
#define EXTERNAL_SSSE3(flags) CPUEXT(flags, _EXTERNAL, SSSE3)
|
||||
#define EXTERNAL_SSE4(flags) CPUEXT(flags, _EXTERNAL, SSE4)
|
||||
#define EXTERNAL_SSE42(flags) CPUEXT(flags, _EXTERNAL, SSE42)
|
||||
#define EXTERNAL_AVX(flags) CPUEXT(flags, _EXTERNAL, AVX)
|
||||
#define EXTERNAL_FMA4(flags) CPUEXT(flags, _EXTERNAL, FMA4)
|
||||
|
||||
#define INLINE_AMD3DNOW(flags) CPUEXT(flags, _INLINE, AMD3DNOW)
|
||||
#define INLINE_AMD3DNOWEXT(flags) CPUEXT(flags, _INLINE, AMD3DNOWEXT)
|
||||
#define INLINE_MMX(flags) CPUEXT(flags, _INLINE, MMX)
|
||||
#define INLINE_MMXEXT(flags) CPUEXT(flags, _INLINE, MMXEXT)
|
||||
#define INLINE_SSE(flags) CPUEXT(flags, _INLINE, SSE)
|
||||
#define INLINE_SSE2(flags) CPUEXT(flags, _INLINE, SSE2)
|
||||
#define INLINE_SSE3(flags) CPUEXT(flags, _INLINE, SSE3)
|
||||
#define INLINE_SSSE3(flags) CPUEXT(flags, _INLINE, SSSE3)
|
||||
#define INLINE_SSE4(flags) CPUEXT(flags, _INLINE, SSE4)
|
||||
#define INLINE_SSE42(flags) CPUEXT(flags, _INLINE, SSE42)
|
||||
#define INLINE_AVX(flags) CPUEXT(flags, _INLINE, AVX)
|
||||
#define INLINE_FMA4(flags) CPUEXT(flags, _INLINE, FMA4)
|
||||
|
||||
void ff_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx);
|
||||
void ff_cpu_xgetbv(int op, int *eax, int *edx);
|
||||
int ff_cpu_cpuid_test(void);
|
||||
|
||||
#endif /* AVUTIL_X86_CPU_H */
|
||||
91
project/jni/ffmpeg/libavutil/x86/cpuid.asm
Normal file
91
project/jni/ffmpeg/libavutil/x86/cpuid.asm
Normal file
@@ -0,0 +1,91 @@
|
||||
;*****************************************************************************
|
||||
;* Copyright (C) 2005-2010 x264 project
|
||||
;*
|
||||
;* Authors: Loren Merritt <lorenm@u.washington.edu>
|
||||
;* Jason Garrett-Glaser <darkshikari@gmail.com>
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "x86util.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx)
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal cpu_cpuid, 5,7
|
||||
push rbx
|
||||
push r4
|
||||
push r3
|
||||
push r2
|
||||
push r1
|
||||
mov eax, r0d
|
||||
xor ecx, ecx
|
||||
cpuid
|
||||
pop r4
|
||||
mov [r4], eax
|
||||
pop r4
|
||||
mov [r4], ebx
|
||||
pop r4
|
||||
mov [r4], ecx
|
||||
pop r4
|
||||
mov [r4], edx
|
||||
pop rbx
|
||||
RET
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_cpu_xgetbv(int op, int *eax, int *edx)
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal cpu_xgetbv, 3,7
|
||||
push r2
|
||||
push r1
|
||||
mov ecx, r0d
|
||||
xgetbv
|
||||
pop r4
|
||||
mov [r4], eax
|
||||
pop r4
|
||||
mov [r4], edx
|
||||
RET
|
||||
|
||||
%if ARCH_X86_64 == 0
|
||||
;-----------------------------------------------------------------------------
|
||||
; int ff_cpu_cpuid_test(void)
|
||||
; return 0 if unsupported
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal cpu_cpuid_test
|
||||
pushfd
|
||||
push ebx
|
||||
push ebp
|
||||
push esi
|
||||
push edi
|
||||
pushfd
|
||||
pop eax
|
||||
mov ebx, eax
|
||||
xor eax, 0x200000
|
||||
push eax
|
||||
popfd
|
||||
pushfd
|
||||
pop eax
|
||||
xor eax, ebx
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
pop ebx
|
||||
popfd
|
||||
ret
|
||||
%endif
|
||||
170
project/jni/ffmpeg/libavutil/x86/float_dsp.asm
Normal file
170
project/jni/ffmpeg/libavutil/x86/float_dsp.asm
Normal file
@@ -0,0 +1,170 @@
|
||||
;*****************************************************************************
|
||||
;* x86-optimized Float DSP functions
|
||||
;*
|
||||
;* Copyright 2006 Loren Merritt
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "x86util.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void vector_fmul(float *dst, const float *src0, const float *src1, int len)
|
||||
;-----------------------------------------------------------------------------
|
||||
%macro VECTOR_FMUL 0
|
||||
cglobal vector_fmul, 4,4,2, dst, src0, src1, len
|
||||
lea lenq, [lend*4 - 2*mmsize]
|
||||
ALIGN 16
|
||||
.loop:
|
||||
mova m0, [src0q + lenq]
|
||||
mova m1, [src0q + lenq + mmsize]
|
||||
mulps m0, m0, [src1q + lenq]
|
||||
mulps m1, m1, [src1q + lenq + mmsize]
|
||||
mova [dstq + lenq], m0
|
||||
mova [dstq + lenq + mmsize], m1
|
||||
|
||||
sub lenq, 2*mmsize
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
VECTOR_FMUL
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
VECTOR_FMUL
|
||||
%endif
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_vector_fmac_scalar(float *dst, const float *src, float mul, int len)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
%macro VECTOR_FMAC_SCALAR 0
|
||||
%if UNIX64
|
||||
cglobal vector_fmac_scalar, 3,3,3, dst, src, len
|
||||
%else
|
||||
cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len
|
||||
%endif
|
||||
%if ARCH_X86_32
|
||||
VBROADCASTSS m0, mulm
|
||||
%else
|
||||
%if WIN64
|
||||
mova xmm0, xmm2
|
||||
%endif
|
||||
shufps xmm0, xmm0, 0
|
||||
%if cpuflag(avx)
|
||||
vinsertf128 m0, m0, xmm0, 1
|
||||
%endif
|
||||
%endif
|
||||
lea lenq, [lend*4-2*mmsize]
|
||||
.loop:
|
||||
mulps m1, m0, [srcq+lenq ]
|
||||
mulps m2, m0, [srcq+lenq+mmsize]
|
||||
addps m1, m1, [dstq+lenq ]
|
||||
addps m2, m2, [dstq+lenq+mmsize]
|
||||
mova [dstq+lenq ], m1
|
||||
mova [dstq+lenq+mmsize], m2
|
||||
sub lenq, 2*mmsize
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
VECTOR_FMAC_SCALAR
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
VECTOR_FMAC_SCALAR
|
||||
%endif
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_vector_fmul_scalar(float *dst, const float *src, float mul, int len)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
%macro VECTOR_FMUL_SCALAR 0
|
||||
%if UNIX64
|
||||
cglobal vector_fmul_scalar, 3,3,2, dst, src, len
|
||||
%else
|
||||
cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len
|
||||
%endif
|
||||
%if ARCH_X86_32
|
||||
movss m0, mulm
|
||||
%elif WIN64
|
||||
SWAP 0, 2
|
||||
%endif
|
||||
shufps m0, m0, 0
|
||||
lea lenq, [lend*4-mmsize]
|
||||
.loop:
|
||||
mova m1, [srcq+lenq]
|
||||
mulps m1, m0
|
||||
mova [dstq+lenq], m1
|
||||
sub lenq, mmsize
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
VECTOR_FMUL_SCALAR
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_vector_dmul_scalar(double *dst, const double *src, double mul,
|
||||
; int len)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
%macro VECTOR_DMUL_SCALAR 0
|
||||
%if ARCH_X86_32
|
||||
cglobal vector_dmul_scalar, 3,4,3, dst, src, mul, len, lenaddr
|
||||
mov lenq, lenaddrm
|
||||
%elif UNIX64
|
||||
cglobal vector_dmul_scalar, 3,3,3, dst, src, len
|
||||
%else
|
||||
cglobal vector_dmul_scalar, 4,4,3, dst, src, mul, len
|
||||
%endif
|
||||
%if ARCH_X86_32
|
||||
VBROADCASTSD m0, mulm
|
||||
%else
|
||||
%if WIN64
|
||||
movlhps xmm2, xmm2
|
||||
%if cpuflag(avx)
|
||||
vinsertf128 ymm2, ymm2, xmm2, 1
|
||||
%endif
|
||||
SWAP 0, 2
|
||||
%else
|
||||
movlhps xmm0, xmm0
|
||||
%if cpuflag(avx)
|
||||
vinsertf128 ymm0, ymm0, xmm0, 1
|
||||
%endif
|
||||
%endif
|
||||
%endif
|
||||
lea lenq, [lend*8-2*mmsize]
|
||||
.loop:
|
||||
mulpd m1, m0, [srcq+lenq ]
|
||||
mulpd m2, m0, [srcq+lenq+mmsize]
|
||||
mova [dstq+lenq ], m1
|
||||
mova [dstq+lenq+mmsize], m2
|
||||
sub lenq, 2*mmsize
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
VECTOR_DMUL_SCALAR
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
VECTOR_DMUL_SCALAR
|
||||
%endif
|
||||
60
project/jni/ffmpeg/libavutil/x86/float_dsp_init.c
Normal file
60
project/jni/ffmpeg/libavutil/x86/float_dsp_init.c
Normal file
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/float_dsp.h"
|
||||
#include "cpu.h"
|
||||
|
||||
extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1,
|
||||
int len);
|
||||
extern void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1,
|
||||
int len);
|
||||
|
||||
extern void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
extern void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
extern void ff_vector_dmul_scalar_sse2(double *dst, const double *src,
|
||||
double mul, int len);
|
||||
extern void ff_vector_dmul_scalar_avx(double *dst, const double *src,
|
||||
double mul, int len);
|
||||
|
||||
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
||||
{
|
||||
int mm_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_SSE(mm_flags)) {
|
||||
fdsp->vector_fmul = ff_vector_fmul_sse;
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
|
||||
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
|
||||
}
|
||||
if (EXTERNAL_SSE2(mm_flags)) {
|
||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
||||
}
|
||||
if (EXTERNAL_AVX(mm_flags)) {
|
||||
fdsp->vector_fmul = ff_vector_fmul_avx;
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
|
||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
|
||||
}
|
||||
}
|
||||
97
project/jni/ffmpeg/libavutil/x86/intreadwrite.h
Normal file
97
project/jni/ffmpeg/libavutil/x86/intreadwrite.h
Normal file
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Alexander Strange <astrange@ithinksw.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_INTREADWRITE_H
|
||||
#define AVUTIL_X86_INTREADWRITE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
#if HAVE_MMX
|
||||
|
||||
#if !HAVE_FAST_64BIT && defined(__MMX__)
|
||||
|
||||
#define AV_COPY64 AV_COPY64
|
||||
static av_always_inline void AV_COPY64(void *d, const void *s)
|
||||
{
|
||||
__asm__("movq %1, %%mm0 \n\t"
|
||||
"movq %%mm0, %0 \n\t"
|
||||
: "=m"(*(uint64_t*)d)
|
||||
: "m" (*(const uint64_t*)s)
|
||||
: "mm0");
|
||||
}
|
||||
|
||||
#define AV_SWAP64 AV_SWAP64
|
||||
static av_always_inline void AV_SWAP64(void *a, void *b)
|
||||
{
|
||||
__asm__("movq %1, %%mm0 \n\t"
|
||||
"movq %0, %%mm1 \n\t"
|
||||
"movq %%mm0, %0 \n\t"
|
||||
"movq %%mm1, %1 \n\t"
|
||||
: "+m"(*(uint64_t*)a), "+m"(*(uint64_t*)b)
|
||||
::"mm0", "mm1");
|
||||
}
|
||||
|
||||
#define AV_ZERO64 AV_ZERO64
|
||||
static av_always_inline void AV_ZERO64(void *d)
|
||||
{
|
||||
__asm__("pxor %%mm0, %%mm0 \n\t"
|
||||
"movq %%mm0, %0 \n\t"
|
||||
: "=m"(*(uint64_t*)d)
|
||||
:: "mm0");
|
||||
}
|
||||
|
||||
#endif /* !HAVE_FAST_64BIT && defined(__MMX__) */
|
||||
|
||||
#ifdef __SSE__
|
||||
|
||||
#define AV_COPY128 AV_COPY128
|
||||
static av_always_inline void AV_COPY128(void *d, const void *s)
|
||||
{
|
||||
struct v {uint64_t v[2];};
|
||||
|
||||
__asm__("movaps %1, %%xmm0 \n\t"
|
||||
"movaps %%xmm0, %0 \n\t"
|
||||
: "=m"(*(struct v*)d)
|
||||
: "m" (*(const struct v*)s)
|
||||
: "xmm0");
|
||||
}
|
||||
|
||||
#endif /* __SSE__ */
|
||||
|
||||
#ifdef __SSE2__
|
||||
|
||||
#define AV_ZERO128 AV_ZERO128
|
||||
static av_always_inline void AV_ZERO128(void *d)
|
||||
{
|
||||
struct v {uint64_t v[2];};
|
||||
|
||||
__asm__("pxor %%xmm0, %%xmm0 \n\t"
|
||||
"movdqa %%xmm0, %0 \n\t"
|
||||
: "=m"(*(struct v*)d)
|
||||
:: "xmm0");
|
||||
}
|
||||
|
||||
#endif /* __SSE2__ */
|
||||
|
||||
#endif /* HAVE_MMX */
|
||||
|
||||
#endif /* AVUTIL_X86_INTREADWRITE_H */
|
||||
43
project/jni/ffmpeg/libavutil/x86/timer.h
Normal file
43
project/jni/ffmpeg/libavutil/x86/timer.h
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_TIMER_H
|
||||
#define AVUTIL_X86_TIMER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
#define AV_READ_TIME read_time
|
||||
|
||||
static inline uint64_t read_time(void)
|
||||
{
|
||||
uint32_t a, d;
|
||||
__asm__ volatile("rdtsc" : "=a" (a), "=d" (d));
|
||||
return ((uint64_t)d << 32) + a;
|
||||
}
|
||||
|
||||
#elif HAVE_RDTSC
|
||||
|
||||
#define AV_READ_TIME __rdtsc
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#endif /* AVUTIL_X86_TIMER_H */
|
||||
73
project/jni/ffmpeg/libavutil/x86/w64xmmtest.h
Normal file
73
project/jni/ffmpeg/libavutil/x86/w64xmmtest.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* check XMM registers for clobbers on Win64
|
||||
* Copyright (c) 2008 Ramiro Polla <ramiro.polla@gmail.com>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/bswap.h"
|
||||
|
||||
#define storexmmregs(mem) \
|
||||
__asm__ volatile( \
|
||||
"movups %%xmm6 , 0x00(%0)\n\t" \
|
||||
"movups %%xmm7 , 0x10(%0)\n\t" \
|
||||
"movups %%xmm8 , 0x20(%0)\n\t" \
|
||||
"movups %%xmm9 , 0x30(%0)\n\t" \
|
||||
"movups %%xmm10, 0x40(%0)\n\t" \
|
||||
"movups %%xmm11, 0x50(%0)\n\t" \
|
||||
"movups %%xmm12, 0x60(%0)\n\t" \
|
||||
"movups %%xmm13, 0x70(%0)\n\t" \
|
||||
"movups %%xmm14, 0x80(%0)\n\t" \
|
||||
"movups %%xmm15, 0x90(%0)\n\t" \
|
||||
:: "r"(mem) : "memory")
|
||||
|
||||
#define testxmmclobbers(func, ctx, ...) \
|
||||
uint64_t xmm[2][10][2]; \
|
||||
int ret; \
|
||||
storexmmregs(xmm[0]); \
|
||||
ret = __real_ ## func(ctx, __VA_ARGS__); \
|
||||
storexmmregs(xmm[1]); \
|
||||
if (memcmp(xmm[0], xmm[1], sizeof(xmm[0]))) { \
|
||||
int i; \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
"XMM REGS CLOBBERED IN %s!\n", #func); \
|
||||
for (i = 0; i < 10; i ++) \
|
||||
if (xmm[0][i][0] != xmm[1][i][0] || \
|
||||
xmm[0][i][1] != xmm[1][i][1]) { \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
"xmm%-2d = %016"PRIx64"%016"PRIx64"\n", \
|
||||
6 + i, av_bswap64(xmm[0][i][0]), \
|
||||
av_bswap64(xmm[0][i][1])); \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
" -> %016"PRIx64"%016"PRIx64"\n", \
|
||||
av_bswap64(xmm[1][i][0]), \
|
||||
av_bswap64(xmm[1][i][1])); \
|
||||
} \
|
||||
abort(); \
|
||||
} \
|
||||
return ret
|
||||
|
||||
#define wrap(func) \
|
||||
int __real_ ## func; \
|
||||
int __wrap_ ## func; \
|
||||
int __wrap_ ## func
|
||||
1296
project/jni/ffmpeg/libavutil/x86/x86inc.asm
Normal file
1296
project/jni/ffmpeg/libavutil/x86/x86inc.asm
Normal file
File diff suppressed because it is too large
Load Diff
667
project/jni/ffmpeg/libavutil/x86/x86util.asm
Normal file
667
project/jni/ffmpeg/libavutil/x86/x86util.asm
Normal file
@@ -0,0 +1,667 @@
|
||||
;*****************************************************************************
|
||||
;* x86util.asm
|
||||
;*****************************************************************************
|
||||
;* Copyright (C) 2008-2010 x264 project
|
||||
;*
|
||||
;* Authors: Loren Merritt <lorenm@u.washington.edu>
|
||||
;* Holger Lubitz <holger@lubitz.org>
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%define program_name ff
|
||||
%define cpuflags_mmxext cpuflags_mmx2
|
||||
|
||||
%include "libavutil/x86/x86inc.asm"
|
||||
|
||||
%macro SBUTTERFLY 4
|
||||
%if avx_enabled == 0
|
||||
mova m%4, m%2
|
||||
punpckl%1 m%2, m%3
|
||||
punpckh%1 m%4, m%3
|
||||
%else
|
||||
punpckh%1 m%4, m%2, m%3
|
||||
punpckl%1 m%2, m%3
|
||||
%endif
|
||||
SWAP %3, %4
|
||||
%endmacro
|
||||
|
||||
%macro SBUTTERFLY2 4
|
||||
punpckl%1 m%4, m%2, m%3
|
||||
punpckh%1 m%2, m%2, m%3
|
||||
SWAP %2, %4, %3
|
||||
%endmacro
|
||||
|
||||
%macro SBUTTERFLYPS 3
|
||||
unpcklps m%3, m%1, m%2
|
||||
unpckhps m%1, m%1, m%2
|
||||
SWAP %1, %3, %2
|
||||
%endmacro
|
||||
|
||||
%macro TRANSPOSE4x4B 5
|
||||
SBUTTERFLY bw, %1, %2, %5
|
||||
SBUTTERFLY bw, %3, %4, %5
|
||||
SBUTTERFLY wd, %1, %3, %5
|
||||
SBUTTERFLY wd, %2, %4, %5
|
||||
SWAP %2, %3
|
||||
%endmacro
|
||||
|
||||
%macro TRANSPOSE4x4W 5
|
||||
SBUTTERFLY wd, %1, %2, %5
|
||||
SBUTTERFLY wd, %3, %4, %5
|
||||
SBUTTERFLY dq, %1, %3, %5
|
||||
SBUTTERFLY dq, %2, %4, %5
|
||||
SWAP %2, %3
|
||||
%endmacro
|
||||
|
||||
%macro TRANSPOSE2x4x4W 5
|
||||
SBUTTERFLY wd, %1, %2, %5
|
||||
SBUTTERFLY wd, %3, %4, %5
|
||||
SBUTTERFLY dq, %1, %3, %5
|
||||
SBUTTERFLY dq, %2, %4, %5
|
||||
SBUTTERFLY qdq, %1, %2, %5
|
||||
SBUTTERFLY qdq, %3, %4, %5
|
||||
%endmacro
|
||||
|
||||
%macro TRANSPOSE4x4D 5
|
||||
SBUTTERFLY dq, %1, %2, %5
|
||||
SBUTTERFLY dq, %3, %4, %5
|
||||
SBUTTERFLY qdq, %1, %3, %5
|
||||
SBUTTERFLY qdq, %2, %4, %5
|
||||
SWAP %2, %3
|
||||
%endmacro
|
||||
|
||||
; identical behavior to TRANSPOSE4x4D, but using SSE1 float ops
|
||||
%macro TRANSPOSE4x4PS 5
|
||||
SBUTTERFLYPS %1, %2, %5
|
||||
SBUTTERFLYPS %3, %4, %5
|
||||
movlhps m%5, m%1, m%3
|
||||
movhlps m%3, m%1
|
||||
SWAP %5, %1
|
||||
movlhps m%5, m%2, m%4
|
||||
movhlps m%4, m%2
|
||||
SWAP %5, %2, %3
|
||||
%endmacro
|
||||
|
||||
%macro TRANSPOSE8x8W 9-11
|
||||
%if ARCH_X86_64
|
||||
SBUTTERFLY wd, %1, %2, %9
|
||||
SBUTTERFLY wd, %3, %4, %9
|
||||
SBUTTERFLY wd, %5, %6, %9
|
||||
SBUTTERFLY wd, %7, %8, %9
|
||||
SBUTTERFLY dq, %1, %3, %9
|
||||
SBUTTERFLY dq, %2, %4, %9
|
||||
SBUTTERFLY dq, %5, %7, %9
|
||||
SBUTTERFLY dq, %6, %8, %9
|
||||
SBUTTERFLY qdq, %1, %5, %9
|
||||
SBUTTERFLY qdq, %2, %6, %9
|
||||
SBUTTERFLY qdq, %3, %7, %9
|
||||
SBUTTERFLY qdq, %4, %8, %9
|
||||
SWAP %2, %5
|
||||
SWAP %4, %7
|
||||
%else
|
||||
; in: m0..m7, unless %11 in which case m6 is in %9
|
||||
; out: m0..m7, unless %11 in which case m4 is in %10
|
||||
; spills into %9 and %10
|
||||
%if %0<11
|
||||
movdqa %9, m%7
|
||||
%endif
|
||||
SBUTTERFLY wd, %1, %2, %7
|
||||
movdqa %10, m%2
|
||||
movdqa m%7, %9
|
||||
SBUTTERFLY wd, %3, %4, %2
|
||||
SBUTTERFLY wd, %5, %6, %2
|
||||
SBUTTERFLY wd, %7, %8, %2
|
||||
SBUTTERFLY dq, %1, %3, %2
|
||||
movdqa %9, m%3
|
||||
movdqa m%2, %10
|
||||
SBUTTERFLY dq, %2, %4, %3
|
||||
SBUTTERFLY dq, %5, %7, %3
|
||||
SBUTTERFLY dq, %6, %8, %3
|
||||
SBUTTERFLY qdq, %1, %5, %3
|
||||
SBUTTERFLY qdq, %2, %6, %3
|
||||
movdqa %10, m%2
|
||||
movdqa m%3, %9
|
||||
SBUTTERFLY qdq, %3, %7, %2
|
||||
SBUTTERFLY qdq, %4, %8, %2
|
||||
SWAP %2, %5
|
||||
SWAP %4, %7
|
||||
%if %0<11
|
||||
movdqa m%5, %10
|
||||
%endif
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
; PABSW macro assumes %1 != %2, while ABS1/2 macros work in-place
|
||||
%macro PABSW 2
|
||||
%if cpuflag(ssse3)
|
||||
pabsw %1, %2
|
||||
%elif cpuflag(mmxext)
|
||||
pxor %1, %1
|
||||
psubw %1, %2
|
||||
pmaxsw %1, %2
|
||||
%else
|
||||
pxor %1, %1
|
||||
pcmpgtw %1, %2
|
||||
pxor %2, %1
|
||||
psubw %2, %1
|
||||
SWAP %1, %2
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro PSIGNW_MMX 2
|
||||
pxor %1, %2
|
||||
psubw %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro PSIGNW_SSSE3 2
|
||||
psignw %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro ABS1_MMX 2 ; a, tmp
|
||||
pxor %2, %2
|
||||
pcmpgtw %2, %1
|
||||
pxor %1, %2
|
||||
psubw %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro ABS2_MMX 4 ; a, b, tmp0, tmp1
|
||||
pxor %3, %3
|
||||
pxor %4, %4
|
||||
pcmpgtw %3, %1
|
||||
pcmpgtw %4, %2
|
||||
pxor %1, %3
|
||||
pxor %2, %4
|
||||
psubw %1, %3
|
||||
psubw %2, %4
|
||||
%endmacro
|
||||
|
||||
%macro ABS1_MMXEXT 2 ; a, tmp
|
||||
pxor %2, %2
|
||||
psubw %2, %1
|
||||
pmaxsw %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro ABS2_MMXEXT 4 ; a, b, tmp0, tmp1
|
||||
pxor %3, %3
|
||||
pxor %4, %4
|
||||
psubw %3, %1
|
||||
psubw %4, %2
|
||||
pmaxsw %1, %3
|
||||
pmaxsw %2, %4
|
||||
%endmacro
|
||||
|
||||
%macro ABS1_SSSE3 2
|
||||
pabsw %1, %1
|
||||
%endmacro
|
||||
|
||||
%macro ABS2_SSSE3 4
|
||||
pabsw %1, %1
|
||||
pabsw %2, %2
|
||||
%endmacro
|
||||
|
||||
%macro ABSB_MMX 2
|
||||
pxor %2, %2
|
||||
psubb %2, %1
|
||||
pminub %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro ABSB2_MMX 4
|
||||
pxor %3, %3
|
||||
pxor %4, %4
|
||||
psubb %3, %1
|
||||
psubb %4, %2
|
||||
pminub %1, %3
|
||||
pminub %2, %4
|
||||
%endmacro
|
||||
|
||||
%macro ABSD2_MMX 4
|
||||
pxor %3, %3
|
||||
pxor %4, %4
|
||||
pcmpgtd %3, %1
|
||||
pcmpgtd %4, %2
|
||||
pxor %1, %3
|
||||
pxor %2, %4
|
||||
psubd %1, %3
|
||||
psubd %2, %4
|
||||
%endmacro
|
||||
|
||||
%macro ABSB_SSSE3 2
|
||||
pabsb %1, %1
|
||||
%endmacro
|
||||
|
||||
%macro ABSB2_SSSE3 4
|
||||
pabsb %1, %1
|
||||
pabsb %2, %2
|
||||
%endmacro
|
||||
|
||||
%macro ABS4 6
|
||||
ABS2 %1, %2, %5, %6
|
||||
ABS2 %3, %4, %5, %6
|
||||
%endmacro
|
||||
|
||||
%define ABS1 ABS1_MMX
|
||||
%define ABS2 ABS2_MMX
|
||||
%define ABSB ABSB_MMX
|
||||
%define ABSB2 ABSB2_MMX
|
||||
|
||||
%macro SPLATB_LOAD 3
|
||||
%if cpuflag(ssse3)
|
||||
movd %1, [%2-3]
|
||||
pshufb %1, %3
|
||||
%else
|
||||
movd %1, [%2-3] ;to avoid crossing a cacheline
|
||||
punpcklbw %1, %1
|
||||
SPLATW %1, %1, 3
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro SPLATB_REG 3
|
||||
%if cpuflag(ssse3)
|
||||
movd %1, %2d
|
||||
pshufb %1, %3
|
||||
%else
|
||||
movd %1, %2d
|
||||
punpcklbw %1, %1
|
||||
SPLATW %1, %1, 0
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro PALIGNR 4-5
|
||||
%if cpuflag(ssse3)
|
||||
%if %0==5
|
||||
palignr %1, %2, %3, %4
|
||||
%else
|
||||
palignr %1, %2, %3
|
||||
%endif
|
||||
%elif cpuflag(mmx) ; [dst,] src1, src2, imm, tmp
|
||||
%define %%dst %1
|
||||
%if %0==5
|
||||
%ifnidn %1, %2
|
||||
mova %%dst, %2
|
||||
%endif
|
||||
%rotate 1
|
||||
%endif
|
||||
%ifnidn %4, %2
|
||||
mova %4, %2
|
||||
%endif
|
||||
%if mmsize==8
|
||||
psllq %%dst, (8-%3)*8
|
||||
psrlq %4, %3*8
|
||||
%else
|
||||
pslldq %%dst, 16-%3
|
||||
psrldq %4, %3
|
||||
%endif
|
||||
por %%dst, %4
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro PSHUFLW 1+
|
||||
%if mmsize == 8
|
||||
pshufw %1
|
||||
%else
|
||||
pshuflw %1
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro PSWAPD 2
|
||||
%if cpuflag(mmxext)
|
||||
pshufw %1, %2, q1032
|
||||
%elif cpuflag(3dnowext)
|
||||
pswapd %1, %2
|
||||
%elif cpuflag(3dnow)
|
||||
movq %1, %2
|
||||
psrlq %1, 32
|
||||
punpckldq %1, %2
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
|
||||
%ifnum %5
|
||||
pand m%3, m%5, m%4 ; src .. y6 .. y4
|
||||
pand m%1, m%5, m%2 ; dst .. y6 .. y4
|
||||
%else
|
||||
mova m%1, %5
|
||||
pand m%3, m%1, m%4 ; src .. y6 .. y4
|
||||
pand m%1, m%1, m%2 ; dst .. y6 .. y4
|
||||
%endif
|
||||
psrlw m%2, 8 ; dst .. y7 .. y5
|
||||
psrlw m%4, 8 ; src .. y7 .. y5
|
||||
%endmacro
|
||||
|
||||
%macro SUMSUB_BA 3-4
|
||||
%if %0==3
|
||||
padd%1 m%2, m%3
|
||||
padd%1 m%3, m%3
|
||||
psub%1 m%3, m%2
|
||||
%else
|
||||
%if avx_enabled == 0
|
||||
mova m%4, m%2
|
||||
padd%1 m%2, m%3
|
||||
psub%1 m%3, m%4
|
||||
%else
|
||||
padd%1 m%4, m%2, m%3
|
||||
psub%1 m%3, m%2
|
||||
SWAP %2, %4
|
||||
%endif
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro SUMSUB_BADC 5-6
|
||||
%if %0==6
|
||||
SUMSUB_BA %1, %2, %3, %6
|
||||
SUMSUB_BA %1, %4, %5, %6
|
||||
%else
|
||||
padd%1 m%2, m%3
|
||||
padd%1 m%4, m%5
|
||||
padd%1 m%3, m%3
|
||||
padd%1 m%5, m%5
|
||||
psub%1 m%3, m%2
|
||||
psub%1 m%5, m%4
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro SUMSUB2_AB 4
|
||||
%ifnum %3
|
||||
psub%1 m%4, m%2, m%3
|
||||
psub%1 m%4, m%3
|
||||
padd%1 m%2, m%2
|
||||
padd%1 m%2, m%3
|
||||
%else
|
||||
mova m%4, m%2
|
||||
padd%1 m%2, m%2
|
||||
padd%1 m%2, %3
|
||||
psub%1 m%4, %3
|
||||
psub%1 m%4, %3
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro SUMSUB2_BA 4
|
||||
%if avx_enabled == 0
|
||||
mova m%4, m%2
|
||||
padd%1 m%2, m%3
|
||||
padd%1 m%2, m%3
|
||||
psub%1 m%3, m%4
|
||||
psub%1 m%3, m%4
|
||||
%else
|
||||
padd%1 m%4, m%2, m%3
|
||||
padd%1 m%4, m%3
|
||||
psub%1 m%3, m%2
|
||||
psub%1 m%3, m%2
|
||||
SWAP %2, %4
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro SUMSUBD2_AB 5
|
||||
%ifnum %4
|
||||
psra%1 m%5, m%2, 1 ; %3: %3>>1
|
||||
psra%1 m%4, m%3, 1 ; %2: %2>>1
|
||||
padd%1 m%4, m%2 ; %3: %3>>1+%2
|
||||
psub%1 m%5, m%3 ; %2: %2>>1-%3
|
||||
SWAP %2, %5
|
||||
SWAP %3, %4
|
||||
%else
|
||||
mova %5, m%2
|
||||
mova %4, m%3
|
||||
psra%1 m%3, 1 ; %3: %3>>1
|
||||
psra%1 m%2, 1 ; %2: %2>>1
|
||||
padd%1 m%3, %5 ; %3: %3>>1+%2
|
||||
psub%1 m%2, %4 ; %2: %2>>1-%3
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro DCT4_1D 5
|
||||
%ifnum %5
|
||||
SUMSUB_BADC w, %4, %1, %3, %2, %5
|
||||
SUMSUB_BA w, %3, %4, %5
|
||||
SUMSUB2_AB w, %1, %2, %5
|
||||
SWAP %1, %3, %4, %5, %2
|
||||
%else
|
||||
SUMSUB_BADC w, %4, %1, %3, %2
|
||||
SUMSUB_BA w, %3, %4
|
||||
mova [%5], m%2
|
||||
SUMSUB2_AB w, %1, [%5], %2
|
||||
SWAP %1, %3, %4, %2
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro IDCT4_1D 6-7
|
||||
%ifnum %6
|
||||
SUMSUBD2_AB %1, %3, %5, %7, %6
|
||||
; %3: %3>>1-%5 %5: %3+%5>>1
|
||||
SUMSUB_BA %1, %4, %2, %7
|
||||
; %4: %2+%4 %2: %2-%4
|
||||
SUMSUB_BADC %1, %5, %4, %3, %2, %7
|
||||
; %5: %2+%4 + (%3+%5>>1)
|
||||
; %4: %2+%4 - (%3+%5>>1)
|
||||
; %3: %2-%4 + (%3>>1-%5)
|
||||
; %2: %2-%4 - (%3>>1-%5)
|
||||
%else
|
||||
%ifidn %1, w
|
||||
SUMSUBD2_AB %1, %3, %5, [%6], [%6+16]
|
||||
%else
|
||||
SUMSUBD2_AB %1, %3, %5, [%6], [%6+32]
|
||||
%endif
|
||||
SUMSUB_BA %1, %4, %2
|
||||
SUMSUB_BADC %1, %5, %4, %3, %2
|
||||
%endif
|
||||
SWAP %2, %5, %4
|
||||
; %2: %2+%4 + (%3+%5>>1) row0
|
||||
; %3: %2-%4 + (%3>>1-%5) row1
|
||||
; %4: %2-%4 - (%3>>1-%5) row2
|
||||
; %5: %2+%4 - (%3+%5>>1) row3
|
||||
%endmacro
|
||||
|
||||
|
||||
%macro LOAD_DIFF 5
|
||||
%ifidn %3, none
|
||||
movh %1, %4
|
||||
movh %2, %5
|
||||
punpcklbw %1, %2
|
||||
punpcklbw %2, %2
|
||||
psubw %1, %2
|
||||
%else
|
||||
movh %1, %4
|
||||
punpcklbw %1, %3
|
||||
movh %2, %5
|
||||
punpcklbw %2, %3
|
||||
psubw %1, %2
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro STORE_DCT 6
|
||||
movq [%5+%6+ 0], m%1
|
||||
movq [%5+%6+ 8], m%2
|
||||
movq [%5+%6+16], m%3
|
||||
movq [%5+%6+24], m%4
|
||||
movhps [%5+%6+32], m%1
|
||||
movhps [%5+%6+40], m%2
|
||||
movhps [%5+%6+48], m%3
|
||||
movhps [%5+%6+56], m%4
|
||||
%endmacro
|
||||
|
||||
%macro LOAD_DIFF_8x4P 7-10 r0,r2,0 ; 4x dest, 2x temp, 2x pointer, increment?
|
||||
LOAD_DIFF m%1, m%5, m%7, [%8], [%9]
|
||||
LOAD_DIFF m%2, m%6, m%7, [%8+r1], [%9+r3]
|
||||
LOAD_DIFF m%3, m%5, m%7, [%8+2*r1], [%9+2*r3]
|
||||
LOAD_DIFF m%4, m%6, m%7, [%8+r4], [%9+r5]
|
||||
%if %10
|
||||
lea %8, [%8+4*r1]
|
||||
lea %9, [%9+4*r3]
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro DIFFx2 6-7
|
||||
movh %3, %5
|
||||
punpcklbw %3, %4
|
||||
psraw %1, 6
|
||||
paddsw %1, %3
|
||||
movh %3, %6
|
||||
punpcklbw %3, %4
|
||||
psraw %2, 6
|
||||
paddsw %2, %3
|
||||
packuswb %2, %1
|
||||
%endmacro
|
||||
|
||||
%macro STORE_DIFF 4
|
||||
movh %2, %4
|
||||
punpcklbw %2, %3
|
||||
psraw %1, 6
|
||||
paddsw %1, %2
|
||||
packuswb %1, %1
|
||||
movh %4, %1
|
||||
%endmacro
|
||||
|
||||
%macro STORE_DIFFx2 8 ; add1, add2, reg1, reg2, zero, shift, source, stride
|
||||
movh %3, [%7]
|
||||
movh %4, [%7+%8]
|
||||
psraw %1, %6
|
||||
psraw %2, %6
|
||||
punpcklbw %3, %5
|
||||
punpcklbw %4, %5
|
||||
paddw %3, %1
|
||||
paddw %4, %2
|
||||
packuswb %3, %5
|
||||
packuswb %4, %5
|
||||
movh [%7], %3
|
||||
movh [%7+%8], %4
|
||||
%endmacro
|
||||
|
||||
%macro PMINUB 3 ; dst, src, ignored
|
||||
%if cpuflag(mmxext)
|
||||
pminub %1, %2
|
||||
%else ; dst, src, tmp
|
||||
mova %3, %1
|
||||
psubusb %3, %2
|
||||
psubb %1, %3
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro SPLATW 2-3 0
|
||||
%if mmsize == 16
|
||||
pshuflw %1, %2, (%3)*0x55
|
||||
punpcklqdq %1, %1
|
||||
%elif cpuflag(mmxext)
|
||||
pshufw %1, %2, (%3)*0x55
|
||||
%else
|
||||
%ifnidn %1, %2
|
||||
mova %1, %2
|
||||
%endif
|
||||
%if %3 & 2
|
||||
punpckhwd %1, %1
|
||||
%else
|
||||
punpcklwd %1, %1
|
||||
%endif
|
||||
%if %3 & 1
|
||||
punpckhwd %1, %1
|
||||
%else
|
||||
punpcklwd %1, %1
|
||||
%endif
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro SPLATD 1
|
||||
%if mmsize == 8
|
||||
punpckldq %1, %1
|
||||
%elif cpuflag(sse2)
|
||||
pshufd %1, %1, 0
|
||||
%elif cpuflag(sse)
|
||||
shufps %1, %1, 0
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro CLIPW 3 ;(dst, min, max)
|
||||
pmaxsw %1, %2
|
||||
pminsw %1, %3
|
||||
%endmacro
|
||||
|
||||
%macro PMINSD_MMX 3 ; dst, src, tmp
|
||||
mova %3, %2
|
||||
pcmpgtd %3, %1
|
||||
pxor %1, %2
|
||||
pand %1, %3
|
||||
pxor %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro PMAXSD_MMX 3 ; dst, src, tmp
|
||||
mova %3, %1
|
||||
pcmpgtd %3, %2
|
||||
pand %1, %3
|
||||
pandn %3, %2
|
||||
por %1, %3
|
||||
%endmacro
|
||||
|
||||
%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp
|
||||
PMINSD_MMX %1, %3, %4
|
||||
PMAXSD_MMX %1, %2, %4
|
||||
%endmacro
|
||||
|
||||
%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused
|
||||
cvtdq2ps %1, %1
|
||||
minps %1, %3
|
||||
maxps %1, %2
|
||||
cvtps2dq %1, %1
|
||||
%endmacro
|
||||
|
||||
%macro CLIPD_SSE41 3-4 ; src/dst, min, max, unused
|
||||
pminsd %1, %3
|
||||
pmaxsd %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro VBROADCASTSS 2 ; dst xmm/ymm, src m32
|
||||
%if cpuflag(avx)
|
||||
vbroadcastss %1, %2
|
||||
%else ; sse
|
||||
movss %1, %2
|
||||
shufps %1, %1, 0
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro VBROADCASTSD 2 ; dst xmm/ymm, src m64
|
||||
%if cpuflag(avx) && mmsize == 32
|
||||
vbroadcastsd %1, %2
|
||||
%elif cpuflag(sse3)
|
||||
movddup %1, %2
|
||||
%else ; sse2
|
||||
movsd %1, %2
|
||||
movlhps %1, %1
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro SHUFFLE_MASK_W 8
|
||||
%rep 8
|
||||
%if %1>=0x80
|
||||
db %1, %1
|
||||
%else
|
||||
db %1*2
|
||||
db %1*2+1
|
||||
%endif
|
||||
%rotate 1
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
%macro PMOVSXWD 2; dst, src
|
||||
%if cpuflag(sse4)
|
||||
pmovsxwd %1, %2
|
||||
%else
|
||||
%ifnidn %1, %2
|
||||
mova %1, %2
|
||||
%endif
|
||||
punpcklwd %1, %1
|
||||
psrad %1, 16
|
||||
%endif
|
||||
%endmacro
|
||||
Reference in New Issue
Block a user