Opened 11 years ago

Closed 11 years ago

#3017 closed defect (fixed)

ff_emu_edge_vfix21_sse causes SIGILL on Pentium-III

Reported by: qyot27 Owned by:
Priority: normal Component: avcodec
Version: git-master Keywords:
Cc: Blocked By:
Blocking: Reproduced by developer: no
Analyzed by developer: no

Description

After building from git yesterday, I noticed that ffmpeg would crash on some H.264 files (mostly 8-bit ones) but not others (10-bit, 12-bit, 14-bit).

The backtrace from a debug build showed that ff_emu_edge_vfix21_sse was tripping it. I'd also tested on an Athlon64 (which didn't exhibit the issue, so between that and the SIGILL I knew it was in the assembly). Using --disable-asm allows it to work correctly on my Pentium-III era Celeron.

Judging from the git log it was more than likely introduced in http://git.videolan.org/?p=ffmpeg.git;a=commit;h=face578d56c2d1375e40d5e2a28acc122132bc55, since builds from a month ago don't exhibit the issue.

The backtrace:

gdb> r -i "Qyot27 - Daybreak [8bit][H.264+AAC] distro.mkv" -vcodec ffvhuff -t 10 test.avi
Starting program: C:\dap\vid\Incoming Files\ffmpeg\ffmpeg.exe -i "Qyot27 - Daybreak [8bit]
[H.264+AAC] distro.mkv" -vcodec ffvhuff -t 10 test.avi
[New Thread 3520.0xdd4]
ffmpeg version N-56805-gdcc92ed Copyright (c) 2000-2013 the FFmpeg developers
  built on Oct  1 2013 17:29:25 with gcc 4.8.1 (GCC)
  libavutil      52. 46.100 / 52. 46.100
  libavcodec     55. 33.101 / 55. 33.101
  libavformat    55. 18.104 / 55. 18.104
  libavdevice    55.  3.100 / 55.  3.100
  libavfilter     3. 88.100 /  3. 88.100
  libavresample   1.  1.  0 /  1.  1.  0
  libswscale      2.  5.100 /  2.  5.100
  libswresample   0. 17.103 /  0. 17.103
  libpostproc    52.  3.100 / 52.  3.100
Input #0, matroska,webm, from 'Qyot27 - Daybreak [8bit][H.264+AAC] distro.mkv':
  Metadata:
    creation_time   : 2013-07-26 22:04:24
  Duration: 00:05:00.47, start: 0.000000, bitrate: 1438 kb/s
    Stream #0:0(eng): Video: h264 (High), yuv420p, 848x480, SAR 1:1 DAR 53:30, 23.98 fps, 23.98 tbr, 1k tbn, 47.95 tbc (default) (forced)
    Stream #0:1(eng): Audio: aac, 44100 Hz, stereo, fltp (default) (forced)
    Stream #0:2: Subtitle: subrip (default) (forced)
    Metadata:
      title           : Dummy Subtitles
    Stream #0:3(eng): Subtitle: ssa
    Metadata:
      title           : Karaoke Subtitles [ASS]
Codec 0x18000 is not in the full list.
    Stream #0:4: Attachment: unknown_codec
    Metadata:
      filename        : Ubuntu-R.ttf
      mimetype        : application/x-truetype-font
    Stream #0:5: Attachment: text
    Metadata:
      filename        : knife.txt
      mimetype        : text/plain
    Stream #0:6: Attachment: png
    Metadata:
      filename        : release_poster.png
      mimetype        : image/png
Output #0, avi, to 'test.avi':
  Metadata:
    ISFT            : Lavf55.18.104
    Stream #0:0(eng): Video: ffvhuff (FFVH / 0x48564646), yuv420p, 848x480 [SAR 1:1 DAR 53:30], q=2-31, 200 kb/s, 23.98 tbn, 23.98 tbc (default) (forced)
    Stream #0:1(eng): Audio: mp3 (libmp3lame) (U[0][0][0] / 0x0055), 44100 Hz, stereo, fltp (default) (forced)
Stream mapping:
  Stream #0:0 -> #0:0 (h264 -> ffvhuff)
  Stream #0:1 -> #0:1 (aac -> libmp3lame)
Press [q] to stop, [?] for help

Program received signal SIGILL, Illegal instruction.
0x00abf799 in ff_emu_edge_vfix21_sse ()
(gdb) bt
#0  0x00abf799 in ff_emu_edge_vfix21_sse ()
#1  0x00a67f6d in emulated_edge_mc (h_extend_var=<optimized out>,
    hfix_tbl=0x14cd5c0 <hfixtbl_sse>, v_extend_var=<optimized out>,
    vfix_tbl=0x14cd640 <vfixtbl_sse>, h=480, w=138727022, src_y=<optimized out>,
    src_x=<optimized out>, block_h=21, block_w=<optimized out>,
    src_stride=<optimized out>,
    src=0x8711d8b "\027\027\027", '\030' <repeats 17 times>, "\027\027\027\026\026\026\026
\026\026\026\026\027\027\030\030\030\030\027\027\027\027\027\030\030\030\027\027\027\027\0
27\027", '\026' <repeats 38 times>, '\027' <repeats 12 times>, "\030\027\027\026\026\026\0
27\027\027\027\027\026\026\026\027\027\026\026\026\026\026\026\026\026\026\026", '\027' <r
epeats 14 times>, "\026\026\026\026", '\027' <repeats 26 times>, "\030\031\027\032\027\031
\026\026\026\026", '\027' <repeats 19 times>..., dst_stride=<optimized out>,
    dst=<optimized out>) at libavcodec/x86/videodsp_init.c:175
#2  emulated_edge_mc_sse (buf=0x8334b60 "\027\030", '\027' <repeats 14 times>,
    buf_stride=896, src=0x87119f6 "", src_stride=896, block_w=21, block_h=21, src_x=6,
    src_y=-1, w=848, h=480) at libavcodec/x86/videodsp_init.c:222
#3  0x00613cb3 in mc_dir_part (chroma_idc=1, pixel_shift=0,
    chroma_op=0xaad4b0 <ff_put_h264_chroma_mc4_mmx>, qpix_op=0x844bd54, src_y_offset=0,
    src_x_offset=4, dest_cr=0x8870214 "", dest_cb=0x8853ff4 "", dest_y=0x87e6298 "",
    list=0, delta=7168, height=16, square=0, n=4, pic=0x847c358, h=0x844bc60)
    at libavcodec/h264.c:935
#4  mc_part_std (chroma_idc=1, pixel_shift=0, list1=0, list0=8192,
    chroma_avg=0xaadc60 <ff_avg_h264_chroma_mc4_mmxext>, qpix_avg=0x844be54,
    chroma_put=0xaad4b0 <ff_put_h264_chroma_mc4_mmx>, qpix_put=0x844bd54, y_offset=0,
    x_offset=4, dest_cr=0x8870214 "", dest_cb=0x8853ff4 "", dest_y=0x87e6298 "",
    delta=7168, height=16, square=0, n=4, h=0x844bc60) at libavcodec/h264.c:1045
#5  mc_part_420_simple_8 (h=h@entry=0x844bc60, n=n@entry=4, square=square@entry=0,
    height=height@entry=16, delta=7168,
    dest_y=dest_y@entry=0x87e6290 "\026\026\026\027\027\027\027\030",
    dest_cb=dest_cb@entry=0x8853ff0 "\200\200\200\200",
    dest_cr=dest_cr@entry=0x8870210 "\177\177\177\177", x_offset=x_offset@entry=4,
    y_offset=y_offset@entry=0, qpix_put=qpix_put@entry=0x844bd54,
    chroma_put=0xaad4b0 <ff_put_h264_chroma_mc4_mmx>,
    qpix_avg=qpix_avg@entry=0x844be54,
    chroma_avg=0xaadc60 <ff_avg_h264_chroma_mc4_mmxext>,
    weight_op=weight_op@entry=0x844bc70, weight_avg=weight_avg@entry=0x844bc80,
    list0=list0@entry=8192, list1=0) at libavcodec/h264_mc_template.c:56
#6  0x0062cdb2 in hl_motion_420_simple_8 (weight_avg=<optimized out>,
    weight_op=<optimized out>, chroma_avg=<optimized out>, qpix_avg=<optimized out>,
    chroma_put=<optimized out>, qpix_put=<optimized out>, dest_cr=<optimized out>,
    dest_cb=<optimized out>, dest_y=<optimized out>, h=<optimized out>)
    at libavcodec/h264_mc_template.c:98
#7  hl_decode_mb_simple_8 (h=h@entry=0x844bc60) at libavcodec/h264_mb_template.c:180
#8  0x00634d28 in ff_h264_hl_decode_mb (h=0x844bc60) at libavcodec/h264.c:2598
#9  decode_slice (avctx=avctx@entry=0x815e2e0, arg=arg@entry=0x22f7fc)
    at libavcodec/h264.c:4439
#10 0x0063535f in execute_decode_slices (h=h@entry=0x844bc60,
    context_count=context_count@entry=1) at libavcodec/h264.c:4590
#11 0x0063d7c9 in decode_nal_units (parse_extradata=0, buf_size=6402, buf=0x8531a20 "",
    h=0x844bc60) at libavcodec/h264.c:4942
#12 decode_frame (avctx=0x815e2e0, data=0x850a500, got_frame=0x22fb2c, avpkt=0x22f900)
    at libavcodec/h264.c:5079
#13 0x005eb46f in avcodec_decode_video2 (avctx=<optimized out>,
    picture=<optimized out>, picture@entry=0x850a500, got_picture_ptr=<optimized out>,
    got_picture_ptr@entry=0x22fb2c, avpkt=<optimized out>, avpkt@entry=0x22fd80)
    at libavcodec/utils.c:1994
#14 0x0041049f in decode_video (ist=ist@entry=0x8533ac0, pkt=pkt@entry=0x22fd80,
    got_output=got_output@entry=0x22fb2c) at ffmpeg.c:1668
#15 0x00415e4b in output_packet (pkt=0x22fd30, ist=0x8533ac0) at ffmpeg.c:1866
#16 process_input (file_index=<optimized out>) at ffmpeg.c:3089
#17 0x01424d09 in transcode_step () at ffmpeg.c:3185
#18 transcode () at ffmpeg.c:3237
#19 main (argc=8, argv=<optimized out>) at ffmpeg.c:3418
(gdb) disass $pc-32,$pc+32
Dump of assembler code from 0xabf779 to 0xabf7b9:
   0x00abf779 <ff_emu_edge_vfix21_sse+9>:       adc    $0x8b,%al
   0x00abf77b <ff_emu_edge_vfix21_sse+11>:      push   %esp
   0x00abf77c <ff_emu_edge_vfix21_sse+12>:      and    $0x1c,%al
   0x00abf77e <ff_emu_edge_vfix21_sse+14>:      mov    0x20(%esp),%ebx
   0x00abf782 <ff_emu_edge_vfix21_sse+18>:      mov    0x24(%esp),%esi
   0x00abf786 <ff_emu_edge_vfix21_sse+22>:      sub    %ebx,%esi
   0x00abf788 <ff_emu_edge_vfix21_sse+24>:      sub    %edx,%ebx
   0x00abf78a <ff_emu_edge_vfix21_sse+26>:      test   %edx,%edx
   0x00abf78c <ff_emu_edge_vfix21_sse+28>:      je     0xabf7a5 <ff_emu_edge_vfix21_sse+53
>
   0x00abf78e <ff_emu_edge_vfix21_sse+30>:      movups (%ecx),%xmm0
   0x00abf791 <ff_emu_edge_vfix21_sse+33>:      movq   0xd(%ecx),%xmm1
   0x00abf796 <ff_emu_edge_vfix21_sse+38>:      movups %xmm0,(%eax)
=> 0x00abf799 <ff_emu_edge_vfix21_sse+41>:      movq   %xmm1,0xd(%eax)
   0x00abf79e <ff_emu_edge_vfix21_sse+46>:      add    0x10(%esp),%eax
   0x00abf7a2 <ff_emu_edge_vfix21_sse+50>:      dec    %edx
   0x00abf7a3 <ff_emu_edge_vfix21_sse+51>:      jne    0xabf796 <ff_emu_edge_vfix21_sse+38
>
   0x00abf7a5 <ff_emu_edge_vfix21_sse+53>:      movups (%ecx),%xmm0
   0x00abf7a8 <ff_emu_edge_vfix21_sse+56>:      movq   0xd(%ecx),%xmm1
   0x00abf7ad <ff_emu_edge_vfix21_sse+61>:      movups %xmm0,(%eax)
   0x00abf7b0 <ff_emu_edge_vfix21_sse+64>:      movq   %xmm1,0xd(%eax)
   0x00abf7b5 <ff_emu_edge_vfix21_sse+69>:      add    0x10(%esp),%eax
End of assembler dump.
(gdb)

Change History (5)

comment:1 by qyot27, 11 years ago

Oh, I forgot the configuration: info.

>ffmpeg -buildconf
ffmpeg version r56804 git-a2db837 Copyright (c) 2000-2013 the FFmpeg developers
  built on Oct  1 2013 00:44:09 with gcc 4.8.1 (GCC)
  libavutil      52. 46.100 / 52. 46.100
  libavcodec     55. 33.101 / 55. 33.101
  libavformat    55. 18.104 / 55. 18.104
  libavdevice    55.  3.100 / 55.  3.100
  libavfilter     3. 88.100 /  3. 88.100
  libavresample   1.  1.  0 /  1.  1.  0
  libswscale      2.  5.100 /  2.  5.100
  libswresample   0. 17.103 /  0. 17.103
  libpostproc    52.  3.100 / 52.  3.100

  configuration:
    --prefix=/home/qyot27/win32_build
    --cross-prefix=i686-w64-mingw32-
    --enable-gpl
    --enable-version3
    --disable-w32threads
    --enable-avresample
    --enable-concise-version
    --enable-fontconfig
    --enable-libfreetype
    --enable-libass
    --enable-libbluray
    --enable-libcaca
    --enable-libcdio
    --enable-openal
    --enable-libquvi
    --enable-librtmp
    --enable-libsoxr
    --enable-libvidstab
    --enable-libflite
    --enable-libgme
    --enable-libgsm
    --enable-libilbc
    --enable-libmp3lame
    --enable-libnut
    --enable-libopencore-amrnb
    --enable-libopencore-amrwb
    --enable-libopenjpeg
    --enable-libopus
    --enable-libschroedinger
    --enable-libshine
    --enable-libspeex
    --enable-libtheora
    --enable-libtwolame
    --enable-libutvideo
    --enable-libvo-aacenc
    --enable-libvo-amrwbenc
    --enable-libvorbis
    --enable-libvpx
    --enable-libwavpack
    --enable-libx264
    --enable-libxavs
    --enable-libxvid
    --enable-avisynth
    --cpu=pentium3
    --extra-cflags='-mfpmath=sse -march=pentium3 -msse -mtune=pentium3 -DPTW32_STATIC_LIB
-DCACA_STATIC'
    --extra-ldflags='-mconsole -Wl,--allow-multiple-definition'
    --target-os=mingw32
    --arch=x86
    --pkg-config='pkg-config --static'

comment:2 by qyot27, 11 years ago

A workaround of sorts is to change EXTERNAL_SSE on line 247 of libavcodec/x86/videodsp_init.c to EXTERNAL_SSE2, so that it doesn't trip the problem on old CPUs. I don't know if that's an acceptable fix for the issue, but it stops the crash.

comment:3 by qyot27, 11 years ago

After more investigation, the exact line that causes the SIGILL is line 213 in libavcodec/x86/videodsp.asm

comment:4 by qyot27, 11 years ago

New backtrace from a new build, including info all-registers

gdb> r -i "Qyot27 - Daybreak [8bit][H.264+AAC] distro.mkv" -vcodec ffvhuff -an -t 2 output.avi
Starting program: C:\dap\vid\Incoming Files\ffmpeg.exe -i "Qyot27 - Daybreak [8bit][H.264+
AAC] distro.mkv" -vcodec ffvhuff -an -t 2 output.avi
[New Thread 676.0x2a0]
ffmpeg version r56998 git-0f322fe Copyright (c) 2000-2013 the FFmpeg developers
  built on Oct  9 2013 12:58:03 with gcc 4.8.1 (GCC)
  configuration: --prefix=/home/qyot27/win32_build --cross-prefix=i686-w64-mingw32- --enab
le-gpl --enable-version3 --disable-w32threads --enable-debug --disable-stripping --enable-
avisynth --cpu=pentium3 --extra-cflags='-mfpmath=sse -march=pentium3 -msse -mtune=pentium3
 -DPTW32_STATIC_LIB' --target-os=mingw32 --arch=x86
  libavutil      52. 46.101 / 52. 46.101
  libavcodec     55. 35.100 / 55. 35.100
  libavformat    55. 19.100 / 55. 19.100
  libavdevice    55.  4.100 / 55.  4.100
  libavfilter     3. 88.101 /  3. 88.101
  libswscale      2.  5.101 /  2.  5.101
  libswresample   0. 17.103 /  0. 17.103
  libpostproc    52.  3.100 / 52.  3.100
Input #0, matroska,webm, from 'Qyot27 - Daybreak [8bit][H.264+AAC] distro.mkv':
  Metadata:
    creation_time   : 2013-07-26 22:04:24
  Duration: 00:05:00.47, start: 0.000000, bitrate: 1438 kb/s
    Stream #0:0(eng): Video: h264 (High), yuv420p, 848x480, SAR 1:1 DAR 53:30, 23.98 fps,
23.98 tbr, 1k tbn, 47.95 tbc (default) (forced)
    Stream #0:1(eng): Audio: aac, 44100 Hz, stereo, fltp (default) (forced)
    Stream #0:2: Subtitle: subrip (default) (forced)
    Metadata:
      title           : Dummy Subtitles
    Stream #0:3(eng): Subtitle: ssa
    Metadata:
      title           : Karaoke Subtitles [ASS]
Codec 0x18000 is not in the full list.
    Stream #0:4: Attachment: unknown_codec
    Metadata:
      filename        : Ubuntu-R.ttf
      mimetype        : application/x-truetype-font
    Stream #0:5: Attachment: text
    Metadata:
      filename        : knife.txt
      mimetype        : text/plain
    Stream #0:6: Attachment: png
    Metadata:
      filename        : release_poster.png
      mimetype        : image/png
Output #0, avi, to 'output.avi':
  Metadata:
    ISFT            : Lavf55.19.100
    Stream #0:0(eng): Video: ffvhuff (FFVH / 0x48564646), yuv420p, 848x480 [SAR 1:1 DAR 53
:30], q=2-31, 200 kb/s, 23.98 tbn, 23.98 tbc (default) (forced)
Stream mapping:
  Stream #0:0 -> #0:0 (h264 -> ffvhuff)
Press [q] to stop, [?] for help

Program received signal SIGILL, Illegal instruction.
0x00ad8859 in ff_emu_edge_vfix21_sse ()
(gdb) bt
#0  0x00ad8859 in ff_emu_edge_vfix21_sse ()
#1  0x00a7b99d in emulated_edge_mc (h_extend_var=<optimized out>,
    hfix_tbl=0xc75fc0 <hfixtbl_sse>, v_extend_var=<optimized out>,
    vfix_tbl=0xc76040 <vfixtbl_sse>, h=480, w=92267534, src_y=<optimized out>,
    src_x=<optimized out>, block_h=21, block_w=<optimized out>,
    src_stride=<optimized out>,
    src=0x5a2f82b "\027\027\027", '\030' <repeats 17 times>, "\027\027\027\026\026\026\026
\026\026\026\026\027\027\030\030\030\030\027\027\027\027\027\030\030\030\027\027\027\027\0
27\027", '\026' <repeats 38 times>, '\027' <repeats 12 times>, "\030\027\027\026\026\026\0
27\027\027\027\027\026\026\026\027\027\026\026\026\026\026\026\026\026\026\026", '\027' <r
epeats 14 times>, "\026\026\026\026", '\027' <repeats 26 times>, "\030\031\027\032\027\031
\026\026\026\026", '\027' <repeats 19 times>..., dst_stride=<optimized out>,
    dst=<optimized out>) at libavcodec/x86/videodsp_init.c:175
#2  emulated_edge_mc_sse (buf=0x5acefe0 "\027\030", '\027' <repeats 14 times>,
    buf_stride=896, src=0x5a2f496 "", src_stride=896, block_w=21, block_h=21, src_x=6,
    src_y=-1, w=848, h=480) at libavcodec/x86/videodsp_init.c:222
#3  0x00601db3 in mc_dir_part (chroma_idc=1, pixel_shift=0,
    chroma_op=0xac6570 <ff_put_h264_chroma_mc4_mmx>, qpix_op=0x57fd2f4, src_y_offset=0,
    src_x_offset=4, dest_cr=0x5b74834 "", dest_cb=0x5b58614 "", dest_y=0x5aea8b8 "",
    list=0, delta=7168, height=16, square=0, n=4, pic=0x582d8f8, h=0x57fd200)
    at libavcodec/h264.c:935
#4  mc_part_std (chroma_idc=1, pixel_shift=0, list1=0, list0=8192,
    chroma_avg=0xac6d20 <ff_avg_h264_chroma_mc4_mmxext>, qpix_avg=0x57fd3f4,
    chroma_put=0xac6570 <ff_put_h264_chroma_mc4_mmx>, qpix_put=0x57fd2f4, y_offset=0,
    x_offset=4, dest_cr=0x5b74834 "", dest_cb=0x5b58614 "", dest_y=0x5aea8b8 "",
    delta=7168, height=16, square=0, n=4, h=0x57fd200) at libavcodec/h264.c:1045
#5  mc_part_420_simple_8 (h=h@entry=0x57fd200, n=n@entry=4, square=square@entry=0,
    height=height@entry=16, delta=7168,
    dest_y=dest_y@entry=0x5aea8b0 "\026\026\026\027\027\027\027\030",
    dest_cb=dest_cb@entry=0x5b58610 "\200\200\200\200",
    dest_cr=dest_cr@entry=0x5b74830 "\177\177\177\177", x_offset=x_offset@entry=4,
    y_offset=y_offset@entry=0, qpix_put=qpix_put@entry=0x57fd2f4,
    chroma_put=0xac6570 <ff_put_h264_chroma_mc4_mmx>,
    qpix_avg=qpix_avg@entry=0x57fd3f4,
    chroma_avg=0xac6d20 <ff_avg_h264_chroma_mc4_mmxext>,
    weight_op=weight_op@entry=0x57fd210, weight_avg=weight_avg@entry=0x57fd220,
    list0=list0@entry=8192, list1=0) at libavcodec/h264_mc_template.c:56
#6  0x0061aeb2 in hl_motion_420_simple_8 (weight_avg=<optimized out>,
    weight_op=<optimized out>, chroma_avg=<optimized out>, qpix_avg=<optimized out>,
    chroma_put=<optimized out>, qpix_put=<optimized out>, dest_cr=<optimized out>,
    dest_cb=<optimized out>, dest_y=<optimized out>, h=<optimized out>)
    at libavcodec/h264_mc_template.c:98
#7  hl_decode_mb_simple_8 (h=h@entry=0x57fd200) at libavcodec/h264_mb_template.c:180
#8  0x00622e28 in ff_h264_hl_decode_mb (h=0x57fd200) at libavcodec/h264.c:2598
#9  decode_slice (avctx=avctx@entry=0x55fc180, arg=arg@entry=0x22f7fc)
    at libavcodec/h264.c:4439
#10 0x0062345f in execute_decode_slices (h=h@entry=0x57fd200,
    context_count=context_count@entry=1) at libavcodec/h264.c:4590
#11 0x0062b8d9 in decode_nal_units (parse_extradata=0, buf_size=6402, buf=0x5964c00 "",
    h=0x57fd200) at libavcodec/h264.c:4942
#12 decode_frame (avctx=0x55fc180, data=0x56d3d20, got_frame=0x22fb2c, avpkt=0x22f900)
    at libavcodec/h264.c:5079
#13 0x005d971f in avcodec_decode_video2 (avctx=<optimized out>,
    picture=<optimized out>, picture@entry=0x56d3d20, got_picture_ptr=<optimized out>,
    got_picture_ptr@entry=0x22fb2c, avpkt=<optimized out>, avpkt@entry=0x22fd80)
    at libavcodec/utils.c:1999
#14 0x0041050f in decode_video (ist=ist@entry=0x58a64e0, pkt=pkt@entry=0x22fd80,
    got_output=got_output@entry=0x22fb2c) at ffmpeg.c:1668
#15 0x00415ebb in output_packet (pkt=0x22fd30, ist=0x58a64e0) at ffmpeg.c:1866
#16 process_input (file_index=<optimized out>) at ffmpeg.c:3089
#17 0x00bf65b9 in transcode_step () at ffmpeg.c:3185
#18 transcode () at ffmpeg.c:3237
#19 main (argc=9, argv=<optimized out>) at ffmpeg.c:3415
(gdb) disass
Dump of assembler code for function ff_emu_edge_vfix21_sse:
   0x00ad8830 <+0>:     push   %ebx
   0x00ad8831 <+1>:     push   %esi
   0x00ad8832 <+2>:     mov    0xc(%esp),%eax
   0x00ad8836 <+6>:     mov    0x14(%esp),%ecx
   0x00ad883a <+10>:    mov    0x1c(%esp),%edx
   0x00ad883e <+14>:    mov    0x20(%esp),%ebx
   0x00ad8842 <+18>:    mov    0x24(%esp),%esi
   0x00ad8846 <+22>:    sub    %ebx,%esi
   0x00ad8848 <+24>:    sub    %edx,%ebx
   0x00ad884a <+26>:    test   %edx,%edx
   0x00ad884c <+28>:    je     0xad8865 <ff_emu_edge_vfix21_sse+53>
   0x00ad884e <+30>:    movups (%ecx),%xmm0
   0x00ad8851 <+33>:    movq   0xd(%ecx),%xmm1
   0x00ad8856 <+38>:    movups %xmm0,(%eax)
=> 0x00ad8859 <+41>:    movq   %xmm1,0xd(%eax)
   0x00ad885e <+46>:    add    0x10(%esp),%eax
   0x00ad8862 <+50>:    dec    %edx
   0x00ad8863 <+51>:    jne    0xad8856 <ff_emu_edge_vfix21_sse+38>
   0x00ad8865 <+53>:    movups (%ecx),%xmm0
   0x00ad8868 <+56>:    movq   0xd(%ecx),%xmm1
   0x00ad886d <+61>:    movups %xmm0,(%eax)
   0x00ad8870 <+64>:    movq   %xmm1,0xd(%eax)
   0x00ad8875 <+69>:    add    0x10(%esp),%eax
   0x00ad8879 <+73>:    add    0x18(%esp),%ecx
   0x00ad887d <+77>:    dec    %ebx
   0x00ad887e <+78>:    jne    0xad8865 <ff_emu_edge_vfix21_sse+53>
   0x00ad8880 <+80>:    test   %esi,%esi
   0x00ad8882 <+82>:    je     0xad889f <ff_emu_edge_vfix21_sse+111>
   0x00ad8884 <+84>:    sub    0x18(%esp),%ecx
   0x00ad8888 <+88>:    movups (%ecx),%xmm0
   0x00ad888b <+91>:    movq   0xd(%ecx),%xmm1
   0x00ad8890 <+96>:    movups %xmm0,(%eax)
   0x00ad8893 <+99>:    movq   %xmm1,0xd(%eax)
   0x00ad8898 <+104>:   add    0x10(%esp),%eax
   0x00ad889c <+108>:   dec    %esi
   0x00ad889d <+109>:   jne    0xad8890 <ff_emu_edge_vfix21_sse+96>
   0x00ad889f <+111>:   pop    %esi
   0x00ad88a0 <+112>:   pop    %ebx
   0x00ad88a1 <+113>:   ret
   0x00ad88a2 <+114>:   jmp    0xad88b0 <ff_emu_edge_vfix22_sse>
   0x00ad88a4 <+116>:   nop
   0x00ad88a5 <+117>:   nop
   0x00ad88a6 <+118>:   nop
   0x00ad88a7 <+119>:   nop
   0x00ad88a8 <+120>:   nop
   0x00ad88a9 <+121>:   nop
   0x00ad88aa <+122>:   nop
   0x00ad88ab <+123>:   nop
   0x00ad88ac <+124>:   nop
   0x00ad88ad <+125>:   nop
   0x00ad88ae <+126>:   nop
   0x00ad88af <+127>:   nop
End of assembler dump.
(gdb) disass $pc-32,$pc+32
Dump of assembler code from 0xad8839 to 0xad8879:
   0x00ad8839 <ff_emu_edge_vfix21_sse+9>:       adc    $0x8b,%al
   0x00ad883b <ff_emu_edge_vfix21_sse+11>:      push   %esp
   0x00ad883c <ff_emu_edge_vfix21_sse+12>:      and    $0x1c,%al
   0x00ad883e <ff_emu_edge_vfix21_sse+14>:      mov    0x20(%esp),%ebx
   0x00ad8842 <ff_emu_edge_vfix21_sse+18>:      mov    0x24(%esp),%esi
   0x00ad8846 <ff_emu_edge_vfix21_sse+22>:      sub    %ebx,%esi
   0x00ad8848 <ff_emu_edge_vfix21_sse+24>:      sub    %edx,%ebx
   0x00ad884a <ff_emu_edge_vfix21_sse+26>:      test   %edx,%edx
   0x00ad884c <ff_emu_edge_vfix21_sse+28>:      je     0xad8865 <ff_emu_edge_vfix21_sse+53
>
   0x00ad884e <ff_emu_edge_vfix21_sse+30>:      movups (%ecx),%xmm0
   0x00ad8851 <ff_emu_edge_vfix21_sse+33>:      movq   0xd(%ecx),%xmm1
   0x00ad8856 <ff_emu_edge_vfix21_sse+38>:      movups %xmm0,(%eax)
=> 0x00ad8859 <ff_emu_edge_vfix21_sse+41>:      movq   %xmm1,0xd(%eax)
   0x00ad885e <ff_emu_edge_vfix21_sse+46>:      add    0x10(%esp),%eax
   0x00ad8862 <ff_emu_edge_vfix21_sse+50>:      dec    %edx
   0x00ad8863 <ff_emu_edge_vfix21_sse+51>:      jne    0xad8856 <ff_emu_edge_vfix21_sse+38
>
   0x00ad8865 <ff_emu_edge_vfix21_sse+53>:      movups (%ecx),%xmm0
   0x00ad8868 <ff_emu_edge_vfix21_sse+56>:      movq   0xd(%ecx),%xmm1
   0x00ad886d <ff_emu_edge_vfix21_sse+61>:      movups %xmm0,(%eax)
   0x00ad8870 <ff_emu_edge_vfix21_sse+64>:      movq   %xmm1,0xd(%eax)
   0x00ad8875 <ff_emu_edge_vfix21_sse+69>:      add    0x10(%esp),%eax
End of assembler dump.
(gdb) info all-registers
eax            0x5acefe0        95219680
ecx            0x5a2f816        94566422
edx            0x1      1
ebx            0x14     20
esp            0x22f5b4 0x22f5b4
ebp            0x15     0x15
esi            0x0      0
edi            0x5acefe0        95219680
eip            0xad8859 0xad8859 <ff_emu_edge_vfix21_sse+41>
eflags         0x10202  [ IF RF ]
cs             0x1b     27
ss             0x23     35
ds             0x23     35
es             0x23     35
fs             0x3b     59
gs             0x0      0
st0            -nan(0x1fe01fe01fe01fe0) (raw 0xffff1fe01fe01fe01fe0)
st1            -nan(0x7f7f7f7f7f7f7f7f) (raw 0xffff7f7f7f7f7f7f7f7f)
st2            -inf     (raw 0xffff0000000000000000)
st3            -inf     (raw 0xffff0000000000000000)
st4            -nan(0x8000800080008)    (raw 0xffff0008000800080008)
st5            -nan(0x8000800080008)    (raw 0xffff0008000800080008)
st6            -nan(0x3f803f803f803f8)  (raw 0xffff03f803f803f803f8)
st7            -inf     (raw 0xffff0000000000000000)
fctrl          0xffff037f       -64641
fstat          0xffff0420       -64480
ftag           0xffffaaaa       -21846
fiseg          0x1b     27
fioff          0x414540 4277568
foseg          0xffff0023       -65501
fooff          0x22fd38 2293048
fop            0x7bc    1980
xmm0           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {
    0x17, 0x18, 0x17 <repeats 14 times>}, v8_int16 = {0x1817, 0x1717, 0x1717, 0x1717,
    0x1717, 0x1717, 0x1717, 0x1717}, v4_int32 = {0x17171817, 0x17171717, 0x17171717,
    0x17171717}, v2_int64 = {0x1717171717171817, 0x1717171717171717},
  uint128 = 0x17171717171717171717171717171817}
xmm1           {v4_float = {0xaae60, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x0, 0xe6, 0x2a, 0x49, 0x0 <repeats 12 times>}, v8_int16 = {0xe600,
    0x492a, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x492ae600, 0x0, 0x0, 0x0},
  v2_int64 = {0x492ae600, 0x0}, uint128 = 0x000000000000000000000000492ae600}
xmm2           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {
    0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
  v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm3           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {
    0x0, 0x0, 0x0, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x0, 0x0, 0x0},
  v8_int16 = {0x0, 0x8000, 0x0, 0x0, 0x0, 0x8000, 0x0, 0x0}, v4_int32 = {0x80000000,
    0x0, 0x80000000, 0x0}, v2_int64 = {0x80000000, 0x80000000},
  uint128 = 0x00000000800000000000000080000000}
xmm4           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {
    0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
  v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm5           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {
    0x0, 0x0, 0x0, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x0, 0x0, 0x0},
  v8_int16 = {0x0, 0x8000, 0x0, 0x0, 0x0, 0x8000, 0x0, 0x0}, v4_int32 = {0x80000000,
    0x0, 0x80000000, 0x0}, v2_int64 = {0x80000000, 0x80000000},
  uint128 = 0x00000000800000000000000080000000}
xmm6           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80},
  v8_int16 = {0x0, 0x0, 0x0, 0x8000, 0x0, 0x0, 0x0, 0x8000}, v4_int32 = {0x0,
    0x80000000, 0x0, 0x80000000}, v2_int64 = {0x8000000000000000, 0x8000000000000000},
  uint128 = 0x80000000000000008000000000000000}
xmm7           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {
    0x0, 0x0, 0x0, 0x80, 0x0, 0x0, 0x0, 0x80, 0x0, 0x0, 0x0, 0x80, 0x0, 0x0, 0x0,
    0x80}, v8_int16 = {0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000}, v4_int32 = {
    0x80000000, 0x80000000, 0x80000000, 0x80000000}, v2_int64 = {0x8000000080000000,
    0x8000000080000000}, uint128 = 0x80000000800000008000000080000000}
mxcsr          0x1fa0   [ PE IM DM ZM OM UM PM ]
mm0            {uint64 = 0x1fe01fe01fe01fe0, v2_int32 = {0x1fe01fe0, 0x1fe01fe0},
  v4_int16 = {0x1fe0, 0x1fe0, 0x1fe0, 0x1fe0}, v8_int8 = {0xe0, 0x1f, 0xe0, 0x1f, 0xe0,
    0x1f, 0xe0, 0x1f}}
mm1            {uint64 = 0x7f7f7f7f7f7f7f7f, v2_int32 = {0x7f7f7f7f, 0x7f7f7f7f},
  v4_int16 = {0x7f7f, 0x7f7f, 0x7f7f, 0x7f7f}, v8_int8 = {0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f}}
mm2            {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0, 0x0, 0x0, 0x0},
  v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}
mm3            {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0, 0x0, 0x0, 0x0},
  v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}
mm4            {uint64 = 0x8000800080008, v2_int32 = {0x80008, 0x80008}, v4_int16 = {
    0x8, 0x8, 0x8, 0x8}, v8_int8 = {0x8, 0x0, 0x8, 0x0, 0x8, 0x0, 0x8, 0x0}}
mm5            {uint64 = 0x8000800080008, v2_int32 = {0x80008, 0x80008}, v4_int16 = {
    0x8, 0x8, 0x8, 0x8}, v8_int8 = {0x8, 0x0, 0x8, 0x0, 0x8, 0x0, 0x8, 0x0}}
mm6            {uint64 = 0x3f803f803f803f8, v2_int32 = {0x3f803f8, 0x3f803f8},
  v4_int16 = {0x3f8, 0x3f8, 0x3f8, 0x3f8}, v8_int8 = {0xf8, 0x3, 0xf8, 0x3, 0xf8, 0x3,
    0xf8, 0x3}}
mm7            {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0, 0x0, 0x0, 0x0},
  v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}

comment:5 by Michael Niedermayer, 11 years ago

Resolution: fixed
Status: newclosed

This should have been fixed by ronald in 20d78a86064a6de5b63e129417b2a38fd333d71f

Note: See TracTickets for help on using tickets.