Ticket #3649: patch3dnow.diff

File patch3dnow.diff, 10.6 KB (added by Carl Eugen Hoyos, 12 years ago)
  • libavcodec/x86/ac3dsp.asm

    diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
    index b244416..47e85b3 100644
    a b AC3_SHIFT r, 32, psrad  
    214214
    215215; The 3DNow! version is not bit-identical because pf2id uses truncation rather
    216216; than round-to-nearest.
     217%if HAVE_AMD3DNOW_EXTERNAL
    217218INIT_MMX 3dnow
    218219cglobal float_to_fixed24, 3, 3, 0, dst, src, len
    219220    movq   m0, [pf_1_24]
    cglobal float_to_fixed24, 3, 3, 0, dst, src, len  
    240241    ja .loop
    241242    femms
    242243    RET
     244%endif
    243245
    244246INIT_XMM sse
    245247cglobal float_to_fixed24, 3, 3, 3, dst, src, len
  • libavcodec/x86/fft.asm

    diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
    index cae404c..864c4c1 100644
    a b fft8 %+ SUFFIX:  
    499499%endmacro
    500500
    501501%if ARCH_X86_32
     502%if HAVE_AMD3DNOWEXT_EXTERNAL
    502503INIT_MMX 3dnowext
    503504FFT48_3DNOW
     505%endif
    504506
     507%if HAVE_AMD3DNOW_EXTERNAL
    505508INIT_MMX 3dnow
    506509FFT48_3DNOW
    507510%endif
     511%endif
    508512
    509513%define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)]
    510514%define Z2(x) [zcq + o3q + mmsize*(x&1)]
    cglobal fft_calc, 2,5,8  
    618622%endmacro
    619623
    620624%if ARCH_X86_32
     625%if HAVE_AMD3DNOW_EXTERNAL
    621626INIT_MMX 3dnow
    622627FFT_CALC_FUNC
     628%endif
     629%if HAVE_AMD3DNOWEXT_EXTERNAL
    623630INIT_MMX 3dnowext
    624631FFT_CALC_FUNC
    625632%endif
     633%endif
    626634INIT_XMM sse
    627635FFT_CALC_FUNC
    628636
    cglobal imdct_calc, 3,5,3  
    712720%endmacro
    713721
    714722%if ARCH_X86_32
     723%if HAVE_AMD3DNOW_EXTERNAL
    715724INIT_MMX 3dnow
    716725IMDCT_CALC_FUNC
     726%endif
     727%if HAVE_AMD3DNOWEXT_EXTERNAL
    717728INIT_MMX 3dnowext
    718729IMDCT_CALC_FUNC
    719730%endif
     731%endif
    720732
    721733INIT_XMM sse
    722734IMDCT_CALC_FUNC
    723735
    724 %if ARCH_X86_32
     736%if ARCH_X86_32 && HAVE_AMD3DNOWEXT_EXTERNAL
    725737INIT_MMX 3dnow
    726738%define mulps pfmul
    727739%define addps pfadd
    INIT_XMM sse  
    789801DECL_FFT 5
    790802DECL_FFT 5, _interleave
    791803%if ARCH_X86_32
     804%if HAVE_AMD3DNOW_EXTERNAL
    792805INIT_MMX 3dnow
    793806DECL_FFT 4
    794807DECL_FFT 4, _interleave
     808%endif
     809%if HAVE_AMD3DNOWEXT_EXTERNAL
    795810INIT_MMX 3dnowext
    796811DECL_FFT 4
    797812DECL_FFT 4, _interleave
    798813%endif
     814%endif
    799815
    800816INIT_XMM sse
    801817%undef mulps
    cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i  
    10781094DECL_IMDCT POSROTATESHUF
    10791095
    10801096%if ARCH_X86_32
     1097%if HAVE_AMD3DNOW_EXTERNAL
    10811098INIT_MMX 3dnow
    10821099DECL_IMDCT POSROTATESHUF_3DNOW
     1100%endif
    10831101
     1102%if HAVE_AMD3DNOWEXT_EXTERNAL
    10841103INIT_MMX 3dnowext
    10851104DECL_IMDCT POSROTATESHUF_3DNOW
    10861105%endif
     1106%endif
    10871107
    10881108INIT_YMM avx
    10891109
  • libavcodec/x86/fmtconvert.asm

    diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
    index 0d3f821..4891b9d 100644
    a b INIT_XMM sse2  
    115115FLOAT_TO_INT16 2
    116116INIT_MMX sse
    117117FLOAT_TO_INT16 0
     118%if HAVE_AMD3DNOW_EXTERNAL
    118119INIT_MMX 3dnow
    119120FLOAT_TO_INT16 0
     121%endif
    120122
    121123;------------------------------------------------------------------------------
    122124; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
    INIT_XMM sse2  
    193195FLOAT_TO_INT16_STEP 2
    194196INIT_MMX sse
    195197FLOAT_TO_INT16_STEP 0
     198%if HAVE_AMD3DNOW_EXTERNAL
    196199INIT_MMX 3dnow
    197200FLOAT_TO_INT16_STEP 0
     201%endif
    198202
    199203;-------------------------------------------------------------------------------
    200204; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
    cglobal float_to_int16_interleave2, 3, 4, 2, dst, src0, src1, len  
    237241    REP_RET
    238242%endmacro
    239243
     244%if HAVE_AMD3DNOW_EXTERNAL
    240245INIT_MMX 3dnow
    241246FLOAT_TO_INT16_INTERLEAVE2
     247%endif
    242248INIT_MMX sse
    243249FLOAT_TO_INT16_INTERLEAVE2
    244250INIT_XMM sse2
    cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s  
    296302
    297303INIT_MMX sse
    298304FLOAT_TO_INT16_INTERLEAVE6
     305%if HAVE_AMD3DNOW_EXTERNAL
    299306INIT_MMX 3dnow
    300307FLOAT_TO_INT16_INTERLEAVE6
     308%endif
     309%if HAVE_AMD3DNOWEXT_EXTERNAL
    301310INIT_MMX 3dnowext
    302311FLOAT_TO_INT16_INTERLEAVE6
     312%endif
    303313
    304314;-----------------------------------------------------------------------------
    305315; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
  • libavcodec/x86/h264_chromamc.asm

    diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
    index 107ae51..f45e455 100644
    a b chroma_mc4_mmx_func avg, h264  
    456456chroma_mc4_mmx_func avg, rv40
    457457chroma_mc2_mmx_func avg, h264
    458458
     459%if HAVE_AMD3DNOW_EXTERNAL
    459460INIT_MMX 3dnow
    460461chroma_mc8_mmx_func avg, h264, _rnd
    461462chroma_mc8_mmx_func avg, vc1,  _nornd
    462463chroma_mc8_mmx_func avg, rv40
    463464chroma_mc4_mmx_func avg, h264
    464465chroma_mc4_mmx_func avg, rv40
     466%endif
    465467
    466468%macro chroma_mc8_ssse3_func 2-3
    467469cglobal %1_%2_chroma_mc8%3, 6, 7, 8
  • libavcodec/x86/hpeldsp.asm

    diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
    index a702b8b..1c69281 100644
    a b cglobal put_pixels8_x2, 4,5  
    8282
    8383INIT_MMX mmxext
    8484PUT_PIXELS8_X2
     85%if HAVE_AMD3DNOW_EXTERNAL
    8586INIT_MMX 3dnow
    8687PUT_PIXELS8_X2
     88%endif
    8789
    8890
    8991; void ff_put_pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    cglobal put_pixels16_x2, 4,5  
    126128
    127129INIT_MMX mmxext
    128130PUT_PIXELS_16
     131%if HAVE_AMD3DNOW_EXTERNAL
    129132INIT_MMX 3dnow
    130133PUT_PIXELS_16
     134%endif
    131135; The 8_X2 macro can easily be used here
    132136INIT_XMM sse2
    133137PUT_PIXELS8_X2
    cglobal put_no_rnd_pixels8_x2, 4,5  
    170174
    171175INIT_MMX mmxext
    172176PUT_NO_RND_PIXELS8_X2
     177%if HAVE_AMD3DNOW_EXTERNAL
    173178INIT_MMX 3dnow
    174179PUT_NO_RND_PIXELS8_X2
     180%endif
    175181
    176182
    177183; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    cglobal put_no_rnd_pixels8_x2_exact, 4,5  
    217223
    218224INIT_MMX mmxext
    219225PUT_NO_RND_PIXELS8_X2_EXACT
     226%if HAVE_AMD3DNOW_EXTERNAL
    220227INIT_MMX 3dnow
    221228PUT_NO_RND_PIXELS8_X2_EXACT
     229%endif
    222230
    223231
    224232; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    cglobal put_pixels8_y2, 4,5  
    255263
    256264INIT_MMX mmxext
    257265PUT_PIXELS8_Y2
     266%if HAVE_AMD3DNOW_EXTERNAL
    258267INIT_MMX 3dnow
    259268PUT_PIXELS8_Y2
     269%endif
    260270; actually, put_pixels16_y2_sse2
    261271INIT_XMM sse2
    262272PUT_PIXELS8_Y2
    cglobal put_no_rnd_pixels8_y2, 4,5  
    295305
    296306INIT_MMX mmxext
    297307PUT_NO_RND_PIXELS8_Y2
     308%if HAVE_AMD3DNOW_EXTERNAL
    298309INIT_MMX 3dnow
    299310PUT_NO_RND_PIXELS8_Y2
     311%endif
    300312
    301313
    302314; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    cglobal put_no_rnd_pixels8_y2_exact, 4,5  
    337349
    338350INIT_MMX mmxext
    339351PUT_NO_RND_PIXELS8_Y2_EXACT
     352%if HAVE_AMD3DNOW_EXTERNAL
    340353INIT_MMX 3dnow
    341354PUT_NO_RND_PIXELS8_Y2_EXACT
     355%endif
    342356
    343357
    344358; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    cglobal avg_pixels8, 4,5  
    367381    REP_RET
    368382%endmacro
    369383
     384%if HAVE_AMD3DNOW_EXTERNAL
    370385INIT_MMX 3dnow
    371386AVG_PIXELS8
     387%endif
    372388
    373389
    374390; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    INIT_MMX mmx  
    427443AVG_PIXELS8_X2
    428444INIT_MMX mmxext
    429445AVG_PIXELS8_X2
     446%if HAVE_AMD3DNOW_EXTERNAL
    430447INIT_MMX 3dnow
    431448AVG_PIXELS8_X2
     449%endif
    432450; actually avg_pixels16_x2
    433451INIT_XMM sse2
    434452AVG_PIXELS8_X2
    cglobal avg_pixels8_y2, 4,5  
    472490
    473491INIT_MMX mmxext
    474492AVG_PIXELS8_Y2
     493%if HAVE_AMD3DNOW_EXTERNAL
    475494INIT_MMX 3dnow
    476495AVG_PIXELS8_Y2
     496%endif
    477497; actually avg_pixels16_y2
    478498INIT_XMM sse2
    479499AVG_PIXELS8_Y2
    cglobal avg_approx_pixels8_xy2, 4,5  
    521541
    522542INIT_MMX mmxext
    523543AVG_APPROX_PIXELS8_XY2
     544%if HAVE_AMD3DNOW_EXTERNAL
    524545INIT_MMX 3dnow
    525546AVG_APPROX_PIXELS8_XY2
     547%endif
    526548
    527549
    528550; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    cglobal %1_pixels8_xy2, 4,5  
    605627
    606628INIT_MMX mmxext
    607629SET_PIXELS_XY2 avg
     630%if HAVE_AMD3DNOW_EXTERNAL
    608631INIT_MMX 3dnow
    609632SET_PIXELS_XY2 avg
     633%endif
    610634INIT_XMM sse2
    611635SET_PIXELS_XY2 put
    612636SET_PIXELS_XY2 avg
  • libavcodec/x86/qpeldsp.asm

    diff --git a/libavcodec/x86/qpeldsp.asm b/libavcodec/x86/qpeldsp.asm
    index dc0f900..8e0f2dc 100644
    a b cglobal put_no_rnd_pixels16_l2, 6,6  
    166166
    167167INIT_MMX mmxext
    168168PUT_NO_RND_PIXELS16_l2
     169%if HAVE_AMD3DNOW_EXTERNAL
    169170INIT_MMX 3dnow
    170171PUT_NO_RND_PIXELS16_l2
     172%endif
    171173
    172174%macro MPEG4_QPEL16_H_LOWPASS 1
    173175cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16
  • libavcodec/x86/rv40dsp.asm

    diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
    index 24a8038..787f401 100644
    a b INIT_MMX mmxext  
    240240FILTER_V  avg
    241241FILTER_H  avg
    242242
     243%if HAVE_AMD3DNOW_EXTERNAL
    243244INIT_MMX  3dnow
    244245FILTER_V  avg
    245246FILTER_H  avg
    246247%endif
     248%endif
    247249
    248250INIT_XMM  sse2
    249251FILTER_H  put
  • libavcodec/x86/videodsp.asm

    diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
    index 1ac0257..4c2252e 100644
    a b cglobal prefetch, 3, 3, 0, buf, stride, h  
    438438
    439439INIT_MMX mmxext
    440440PREFETCH_FN prefetcht0
    441 %if ARCH_X86_32
     441%if ARCH_X86_32 && HAVE_AMD3DNOW_EXTERNAL
    442442INIT_MMX 3dnow
    443443PREFETCH_FN prefetch
    444444%endif
  • libavcodec/x86/vorbisdsp.asm

    diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm
    index b25d838..8bbd4a1 100644
    a b pdw_80000000: times 4 dd 0x80000000  
    2727
    2828SECTION .text
    2929
    30 %if ARCH_X86_32
     30%if ARCH_X86_32 && HAVE_AMD3DNOW_EXTERNAL
    3131INIT_MMX 3dnow
    3232cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size
    3333    pxor                     m7, m7
  • libavutil/x86/float_dsp.asm

    diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
    index ec3d22b..71ccaa6 100644
    a b cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1  
    244244    REP_RET
    245245%endmacro
    246246
     247%if HAVE_AMD3DNOWEXT_EXTERNAL
    247248INIT_MMX 3dnowext
    248249VECTOR_FMUL_WINDOW
     250%endif
    249251INIT_XMM sse
    250252VECTOR_FMUL_WINDOW
    251253
  • libpostproc/postprocess.c

    diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
    index da586ff..99f1dfb 100644
    a b static av_always_inline void do_a_deblock_C(uint8_t *src, int step,  
    554554#        include "postprocess_template.c"
    555555#        define TEMPLATE_PP_MMXEXT 1
    556556#        include "postprocess_template.c"
     557#        if HAVE_AMD3DNOW_INLINE
    557558#        define TEMPLATE_PP_3DNOW 1
    558559#        include "postprocess_template.c"
     560#        endif
    559561#        define TEMPLATE_PP_SSE2 1
    560562#        include "postprocess_template.c"
    561563#    else
    static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[]  
    592594        // ordered per speed fastest first
    593595        if      (c->cpuCaps & AV_CPU_FLAG_SSE2)     pp = postProcess_SSE2;
    594596        else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT)   pp = postProcess_MMX2;
     597#if HAVE_AMD3DNOW_INLINE
    595598        else if (c->cpuCaps & AV_CPU_FLAG_3DNOW)    pp = postProcess_3DNow;
     599#endif
    596600        else if (c->cpuCaps & AV_CPU_FLAG_MMX)      pp = postProcess_MMX;
    597601#elif HAVE_ALTIVEC
    598602        if      (c->cpuCaps & AV_CPU_FLAG_ALTIVEC)  pp = postProcess_altivec;