Changes between Initial Version and Version 1 of Ticket #1844, comment 4


Ignore:
Timestamp:
Oct 25, 2012, 6:44:20 PM (4 years ago)
Author:
thegeek
Comment:

Legend:

Unmodified
Added
Removed
Modified
  • Ticket #1844, comment 4

    initial v1  
    11Replying to [comment:3 cehoyos]: 
    22> Please add the missing parts of the gdb session (disassembly, register dump), see https://ffmpeg.org/bugreports.html - consider using a static build (although it does not really matter). 
    3 Ok. 
     3Ok, this is the static build with slightly different build settings, which seems to produce the crash in the sse2 version of the function: 
     4{{{ 
     5  configuration: --cross-prefix=/home/swingcatalyst/mxe/build-ffmpeg/../../mxe/mxe-multitarget-static/usr/bin/x86_64-static-mingw32- --enable-cross-compile --arch=x86_64 --target-os=mingw32 --prefix=/home/swingcatalyst/mxe/build-ffmpeg/install/ffmpeg-1.0-x86_64-static-install --disable-shared --disable-postproc --disable-pthreads --enable-runtime-cpudetect --enable-bzlib --enable-libfreetype --enable-libmp3lame --enable-libspeex --enable-libtheora --enable-libvorbis --enable-libvpx --enable-zlib --disable-stripping 
     6}}} 
     7{{{ 
     8(gdb) bt 
     9#0  0x0000000000add1ec in ff_mix_2_1_a_int16_sse2 () 
     10#1  0x00000000085ef0a0 in ?? () 
     11#2  0x0000000000000000 in ?? () 
     12(gdb) disass $pc-32,$pc+32 
     13Dump of assembler code from 0xadd1cc to 0xadd20c: 
     14   0x0000000000add1cc <ff_mix_2_1_a_int16_sse2+44>:     test   $0xf,%r8 
     15   0x0000000000add1d3 <ff_mix_2_1_a_int16_sse2+51>:     jne    0xadd0bf <mix_2_1_int16_u_int_sse2> 
     16   0x0000000000add1d9 <ff_mix_2_1_a_int16_sse2+57>:     test   $0xf,%rcx 
     17   0x0000000000add1e0 <ff_mix_2_1_a_int16_sse2+64>:     jne    0xadd0bf <mix_2_1_int16_u_int_sse2> 
     18   0x0000000000add1e6 <ff_mix_2_1_a_int16_sse2+70>:     movd   (%r9,%r10,4),%xmm4 
     19=> 0x0000000000add1ec <ff_mix_2_1_a_int16_sse2+76>:     movd   (%r9,%r11,4),%xmm6 
     20   0x0000000000add1f2 <ff_mix_2_1_a_int16_sse2+82>:     pshuflw $0x0,%xmm4,%xmm5 
     21   0x0000000000add1f7 <ff_mix_2_1_a_int16_sse2+87>:     punpcklqdq %xmm5,%xmm5 
     22   0x0000000000add1fb <ff_mix_2_1_a_int16_sse2+91>:     pshuflw $0x0,%xmm6,%xmm6 
     23   0x0000000000add200 <ff_mix_2_1_a_int16_sse2+96>:     punpcklqdq %xmm6,%xmm6 
     24   0x0000000000add204 <ff_mix_2_1_a_int16_sse2+100>:    psllq  $0x20,%xmm4 
     25   0x0000000000add209 <ff_mix_2_1_a_int16_sse2+105>:    psrlq  $0x30,%xmm4 
     26End of assembler dump. 
     27(gdb) info all-registers 
     28rax            0x2000   8192 
     29rbx            0x5b267a0        95578016 
     30rcx            0xc2dc0a0        204325024 
     31rdx            0xc2bbfe0        204193760 
     32rsi            0x5b26970        95578480 
     33rdi            0x5b267a0        95578016 
     34rbp            0x0      0x0 
     35rsp            0x23dc98 0x23dc98 
     36r8             0xc2c3fe0        204226528 
     37r9             0x7604f80        123752320 
     38r10            0x0      0 
     39r11            0xea6400000001   257715217629185 
     40r12            0x23de10 2350608 
     41r13            0x1      1 
     42r14            0x0      0 
     43r15            0x1      1 
     44rip            0xadd1ec 0xadd1ec <ff_mix_2_1_a_int16_sse2+76> 
     45eflags         0x10246  [ PF ZF IF RF ] 
     46cs             0x33     51 
     47ss             0x2b     43 
     48ds             0x0      0 
     49es             0x0      0 
     50fs             0x0      0 
     51gs             0x0      0 
     52st0            -nan(0xbebebebebebebebe) (raw 0xffffbebebebebebebebe) 
     53st1            -nan(0xbebebebebebebebe) (raw 0xffffbebebebebebebebe) 
     54st2            -nan(0x31b7323331362f33) (raw 0xffff31b7323331362f33) 
     55st3            -nan(0x3100b700320033)   (raw 0xffff003100b700320033) 
     56st4            9        (raw 0x40029000000000000000) 
     57st5            1        (raw 0x3fff8000000000000000) 
     58st6            2818.3829312644548       (raw 0x400ab026207c88973351) 
     59st7            3.4500000000000002       (raw 0x4000dcccccccccccd000) 
     60fctrl          0x420037f        69206911 
     61fstat          0x420    1056 
     62ftag           0x0      0 
     63fiseg          0x33     51 
     64fioff          0xd252a0 13783712 
     65foseg          0x2b     43 
     66fooff          0x23e0a0 2351264 
     67fop            0x0      0 
     68xmm0           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xea, 0xff, 0xd5, 0xff, 0xd3, 0xff, 0xdd, 0xff, 0xd5, 0xff, 
     69    0xce, 0xff, 0xd8, 0xff, 0xdf, 0xff}, v8_int16 = {0xffea, 0xffd5, 0xffd3, 0xffdd, 0xffd5, 0xffce, 0xffd8, 0xffdf}, v4_int32 = {0xffd5ffea, 0xffddffd3, 0xffceffd5, 
     70    0xffdfffd8}, v2_int64 = {0xffddffd3ffd5ffea, 0xffdfffd8ffceffd5}, uint128 = 0xffdfffd8ffceffd5ffddffd3ffd5ffea} 
     71xmm1           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xea, 0xff, 0xd5, 0xff, 0xd3, 0xff, 0xdd, 0xff, 0xd5, 0xff, 
     72    0xce, 0xff, 0xd8, 0xff, 0xdf, 0xff}, v8_int16 = {0xffea, 0xffd5, 0xffd3, 0xffdd, 0xffd5, 0xffce, 0xffd8, 0xffdf}, v4_int32 = {0xffd5ffea, 0xffddffd3, 0xffceffd5, 
     73    0xffdfffd8}, v2_int64 = {0xffddffd3ffd5ffea, 0xffdfffd8ffceffd5}, uint128 = 0xffdfffd8ffceffd5ffddffd3ffd5ffea} 
     74xmm2           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xd5, 0xff, 0xce, 0xff, 0xd8, 0xff, 0xdf, 0xff, 0xd5, 0xff, 
     75    0xce, 0xff, 0xd8, 0xff, 0xdf, 0xff}, v8_int16 = {0xffd5, 0xffce, 0xffd8, 0xffdf, 0xffd5, 0xffce, 0xffd8, 0xffdf}, v4_int32 = {0xffceffd5, 0xffdfffd8, 0xffceffd5, 
     76    0xffdfffd8}, v2_int64 = {0xffdfffd8ffceffd5, 0xffdfffd8ffceffd5}, uint128 = 0xffdfffd8ffceffd5ffdfffd8ffceffd5} 
     77xmm3           {v4_float = {0xffffffaf, 0xffffffcd, 0xffffffdb, 0xfffffff3}, v2_double = {0xffffffc2c6667abb, 0xffffffffff8726c5}, v16_int8 = {0xf6, 0x8d, 0xa2, 0xc2, 0xcc, 
     78    0x9c, 0x4e, 0xc2, 0x3f, 0x3b, 0x15, 0xc2, 0x4e, 0x36, 0x5e, 0xc1}, v8_int16 = {0x8df6, 0xc2a2, 0x9ccc, 0xc24e, 0x3b3f, 0xc215, 0x364e, 0xc15e}, v4_int32 = {0xc2a28df6, 
     79    0xc24e9ccc, 0xc2153b3f, 0xc15e364e}, v2_int64 = {0xc24e9cccc2a28df6, 0xc15e364ec2153b3f}, uint128 = 0xc15e364ec2153b3fc24e9cccc2a28df6} 
     80xmm4           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0, 0x40, 0xf, 0x0 <repeats 13 times>}, v8_int16 = {0x4000, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 
     81    0x0}, v4_int32 = {0xf4000, 0x0, 0x0, 0x0}, v2_int64 = {0xf4000, 0x0}, uint128 = 0x000000000000000000000000000f4000} 
     82xmm5           {v4_float = {0xffffffd0, 0xffffffdb, 0xffffffe9, 0xfffffff3}, v2_double = {0xfffffffab1300f70, 0xffffffffff8726c5}, v16_int8 = {0x85, 0x27, 0x41, 0xc2, 0x3f, 
     83    0x3b, 0x15, 0xc2, 0x45, 0x95, 0xb9, 0xc1, 0x4e, 0x36, 0x5e, 0xc1}, v8_int16 = {0x2785, 0xc241, 0x3b3f, 0xc215, 0x9545, 0xc1b9, 0x364e, 0xc15e}, v4_int32 = {0xc2412785, 
     84    0xc2153b3f, 0xc1b99545, 0xc15e364e}, v2_int64 = {0xc2153b3fc2412785, 0xc15e364ec1b99545}, uint128 = 0xc15e364ec1b99545c2153b3fc2412785} 
     85xmm6           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = { 
     86    0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 0x00000000000000000000000000000000} 
     87xmm7           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = { 
     88    0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 0x00000000000000000000000000000000} 
     89xmm8           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0, 0x0, 0x0, 0x80, 0x0 <repeats 12 times>}, v8_int16 = {0x0, 0x8000, 0x0, 0x0, 0x0, 0x0, 
     90    0x0, 0x0}, v4_int32 = {0x80000000, 0x0, 0x0, 0x0}, v2_int64 = {0x80000000, 0x0}, uint128 = 0x00000000000000000000000080000000} 
     91xmm9           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = { 
     92    0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 0x00000000000000000000000000000000} 
     93xmm10          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = { 
     94    0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 0x00000000000000000000000000000000} 
     95xmm11          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = { 
     96    0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 0x00000000000000000000000000000000} 
     97xmm12          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = { 
     98    0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 0x00000000000000000000000000000000} 
     99xmm13          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = { 
     100    0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 0x00000000000000000000000000000000} 
     101xmm14          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = { 
     102    0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 0x00000000000000000000000000000000} 
     103xmm15          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = { 
     104    0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 0x00000000000000000000000000000000} 
     105mxcsr          0x1fa0   [ PE IM DM ZM OM UM PM ] 
     106(gdb) 
     107}}} 
    4108>  
    5109> Do you know if the Zeranoe build actually contains the AVX-optimized function ff_mix_2_1_a_float_avx? 
    6 I just checked and it does. I've also seen this crash with the sse2 version of the function. 
     110I just checked and it does (both sse and avx versions). 
    7111 
    8112I'm currently investigating something curious I've discovered. It seems my crosschain compilex libvpx with pthreads, but I use w32threads in ffmpeg. Will using both thread libraries be OK?