Opened 11 years ago

Last modified 11 years ago

#2764 new enhancement

FFMPEG is blocked when multicast group membership is lost (on linux kernel < 2.6.27)

Reported by: asif Owned by:
Priority: normal Component: undetermined
Version: 0.10.7 Keywords: udp
Cc: Blocked By:
Blocking: Reproduced by developer: no
Analyzed by developer: no

Description

Summary of the bug:
FFmpeg is blocking until it receives the data packet or there is some socket error. The issue is group membership of the socket is being flushed from kernel group membership table whenever there is some disruption in the network. It does happen on centos as it is using an older version of the kernel (older version: “2.6.18”). As the ffmpeg udp.c is waiting on receiving packets and not checking whether there is any problem with its group membership, it will continue to wait for packets.

The Issue was Reproduced on:
CentOS kernel: 2.6.18
ffmpeg version: 0.10

How to reproduce:

  1. Start ffmpeg stream transmission:

$ ./ffmpeg -i INPUT -f mpegts udp://233.19.204.1:5501

  1. Start ffmpeg stream receiver:

$ ./ffplay udp://233.19.204.1:5501

  1. The multicast group member can be lost due to disruption in the network. One way is also to restarting network service:

$ /etc/init.d/network restart

The receiver will keep on waiting for the udp packets without checking the multicast group membership.

Note:
The issue happens only on kernel versions < 2.6.27. The issue in the multicast group membership seemed to be fixed by linux kerenl in version 2.6.27:
http://mirror.linux.org.au/linux/kernel/v2.6/ChangeLog-2.6.27

However, some linux flavors including CentOS 5.7 still uses 2.6.18 which has this issue.

Possible Fix:

in libavformat/udp.c Check the multicastgroupmembership as below:

#ifdef _MULTICAST_HANDLELOSTMEMBERSHIP

#define PATH_PROCNET_IGMP              "/proc/net/igmp"
//TODO: Support ipv6
static int udp_check_multicastgroupmembership(void *_URLContext, struct sockaddr *addr)
{
    FILE *f_igmp;
    char igmp_line[8192];
    char target_addr[10];
    int result = -1;
    URLContext *h = _URLContext;

    if(addr->sa_family != AF_INET)
    {
    	av_log(h, AV_LOG_INFO, "udp.c: IPPROTO_IPV6 NOT SUPPORTED\n");
		return -1; //NOT SUPPORTED
    }


    if(NULL == (f_igmp = fopen(PATH_PROCNET_IGMP, "r")))
    {
    	av_log(h, AV_LOG_ERROR, "udp.c: Unable to open %s\n", PATH_PROCNET_IGMP);
        return -1;
    }

    snprintf(target_addr, 9, "%X", ((struct sockaddr_in *)addr)->sin_addr.s_addr);


    if(fgets(igmp_line, sizeof(igmp_line), f_igmp)) {
        if(strstr(igmp_line, "Device") == NULL) {
        	av_log(h, AV_LOG_INFO, "udp.c: IPPROTO_IPV6 NOT SUPPORTED\n");
            fclose(f_igmp);
            return -1;
        }
    }

    result = 0;
    while (!feof(f_igmp)) {
       if(fgets(igmp_line, sizeof(igmp_line), f_igmp)){
           if(NULL != strstr(igmp_line, target_addr)){
            result = 1;
            break;
           }
       }

    };

    fclose(f_igmp);
    return result;
}
#endif

The function can be called from circular_buffer_task function as below:

#if HAVE_PTHREADS
static void *circular_buffer_task( void *_URLContext)
{
    URLContext *h = _URLContext;
    UDPContext *s = h->priv_data;
    fd_set rfds;
    struct timeval tv;

#ifdef _MULTICAST_HANDLELOSTMEMBERSHIP
    int timeout_count = 0;
	const int timeout_max_value = 5;	// 5 second timeout
#endif
    while(!s->exit_thread) {
        int left;
        int ret;
        int len;

        if (ff_check_interrupt(&h->interrupt_callback)) {
            s->circular_buffer_error = EIO;
            goto end;
        }

        FD_ZERO(&rfds);
        FD_SET(s->udp_fd, &rfds);
        tv.tv_sec = 1;
        tv.tv_usec = 0;
        ret = select(s->udp_fd + 1, &rfds, NULL, NULL, &tv);
        if (ret < 0) {
            if (ff_neterrno() == AVERROR(EINTR)) {
                av_log(h, AV_LOG_INFO, "Got ERROR EINTR");
                continue;
            }
            s->circular_buffer_error = EIO;
            goto end;
        }
#ifdef _MULTICAST_HANDLELOSTMEMBERSHIP
        else if(ret == 0 && s->is_multicast && (h->flags & AVIO_FLAG_READ))
        {
            if(++timeout_count >= timeout_max_value)
            {
                av_log(h, AV_LOG_DEBUG, "No Packet for %d seconds\n", timeout_max_value);
                //check if the problem is due to lost group membership
                if(0 == udp_check_multicastgroupmembership(h, (struct sockaddr *)&s->dest_addr))
                {
                    //if the problem is due to lost multicast group membership, reinitialize group membership
                    av_log(h, AV_LOG_DEBUG, "Restoring group membership\n");
                    udp_leave_multicast_group(s->udp_fd, (struct sockaddr *)&s->dest_addr);
                    if (udp_join_multicast_group(s->udp_fd, (struct sockaddr *)&s->dest_addr) < 0)
                    {
                        av_log(h, AV_LOG_ERROR, "udp.c: groupmembership retry failed\n");
                    }
                }

                timeout_count = 0;
            }
        }
#endif

        if (!(ret > 0 && FD_ISSET(s->udp_fd, &rfds)))
            continue;


        /* How much do we have left to the end of the buffer */
        /* Whats the minimum we can read so that we dont comletely fill the buffer */
        left = av_fifo_space(s->fifo);

        /* No Space left, error, what do we do now */
        if(left < UDP_MAX_PKT_SIZE + 4) {
            av_log(h, AV_LOG_ERROR, "circular_buffer: OVERRUN\n");
            s->circular_buffer_error = EIO;
            goto end;
        }
        left = FFMIN(left, s->fifo->end - s->fifo->wptr);
        len = recv(s->udp_fd, s->tmp+4, sizeof(s->tmp)-4, 0);
        if (len < 0) {
            if (ff_neterrno() != AVERROR(EAGAIN) && ff_neterrno() != AVERROR(EINTR)) {
                s->circular_buffer_error = EIO;
                goto end;
            }
            continue;
        }
        AV_WL32(s->tmp, len);
        pthread_mutex_lock(&s->mutex);
        av_fifo_generic_write(s->fifo, s->tmp, len+4, NULL);
        pthread_cond_signal(&s->cond);
        pthread_mutex_unlock(&s->mutex);
    }

end:
    pthread_mutex_lock(&s->mutex);
    pthread_cond_signal(&s->cond);
    pthread_mutex_unlock(&s->mutex);
    return NULL;
}
#endif

Change History (1)

comment:1 by Carl Eugen Hoyos, 11 years ago

Keywords: multicast blocked lost group membership removed

Please test current git head and please provide the console output of the failing command to make this a valid ticket.

If you have a patch that fixes this issue, please send it to the ffmpeg-devel mailing list, patches receive more attention there.

Note: See TracTickets for help on using tickets.