Context Navigation

#3556 closed defect (duplicate)

bug when resampling stereo to stereo

Reported by:	Oleg	Owned by:
Priority:	normal	Component:	undetermined
Version:	unspecified	Keywords:
Cc:		Blocked By:
Blocking:		Reproduced by developer:	no
Analyzed by developer:	no

Description

I found a bug in ffmpeg.

#include "stdafx.h"
#include <iostream>

extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
//#include "swscale.h"
#include "libswresample/swresample.h"
};

FILE           *fin,
	*fout;

int ffmpeg_audio_decode( const char * inFile, const char * outFile)
{
	// Initialize FFmpeg
	av_register_all();

	AVFrame* frame = avcodec_alloc_frame();
	if (!frame)
	{
		std::cout << "Error allocating the frame" << std::endl;
		return 1;
	}

	// you can change the file name "01 Push Me to the Floor.wav" to whatever the file is you're reading, like "myFile.ogg" or
	// "someFile.webm" and this should still work
	AVFormatContext* formatContext = NULL;
	//if (avformat_open_input(&formatContext, "01 Push Me to the Floor.wav", NULL, NULL) != 0)
	if (avformat_open_input(&formatContext, inFile, NULL, NULL) != 0)
	{
		av_free(frame);
		std::cout << "Error opening the file" << std::endl;
		return 1;
	}
	
	if (avformat_find_stream_info(formatContext, NULL) < 0)
	{
		av_free(frame);
		av_close_input_file(formatContext);
		std::cout << "Error finding the stream info" << std::endl;
		return 1;
	}

	AVStream* audioStream = NULL;
	// Find the audio stream (some container files can have multiple streams in them)
	for (unsigned int i = 0; i < formatContext->nb_streams; ++i)
	{
		if (formatContext->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
		{
			audioStream = formatContext->streams[i];
			break;
		}
	}

	if (audioStream == NULL)
	{
		av_free(frame);
		av_close_input_file(formatContext);
		std::cout << "Could not find any audio stream in the file" << std::endl;
		return 1;
	}

	AVCodecContext* codecContext = audioStream->codec;

	codecContext->codec = avcodec_find_decoder(codecContext->codec_id);
	if (codecContext->codec == NULL)
	{
		av_free(frame);
		av_close_input_file(formatContext);
		std::cout << "Couldn't find a proper decoder" << std::endl;
		return 1;
	}
	else if (avcodec_open2(codecContext, codecContext->codec, NULL) != 0)
	{
		av_free(frame);
		av_close_input_file(formatContext);
		std::cout << "Couldn't open the context with the decoder" << std::endl;
		return 1;
	}

	std::cout << "This stream has " << codecContext->channels << " channels and a sample rate of " << codecContext->sample_rate << "Hz" << std::endl;
	std::cout << "The data is in the format " << av_get_sample_fmt_name(codecContext->sample_fmt) << std::endl;

	//codecContext->sample_fmt = AV_SAMPLE_FMT_S16;

	int64_t outChannelLayout = AV_CH_LAYOUT_STEREO;//AV_CH_LAYOUT_MONO; //AV_CH_LAYOUT_STEREO;
	AVSampleFormat outSampleFormat = AV_SAMPLE_FMT_S16; // Packed audio, non-planar (this is the most common format, and probably what you want; also, WAV needs it)
	int outSampleRate = 44100;//8000;//44100;
/*
	Wav wav;
	wav.sampleRate = outSampleRate;
	wav.sampleSize = av_get_bytes_per_sample(outSampleFormat);
	wav.channels = av_get_channel_layout_nb_channels(outChannelLayout);
*/
	// Note that AVCodecContext::channel_layout may or may not be set by libavcodec. Because of this,
	// we won't use it, and will instead try to guess the layout from the number of channels.
	SwrContext* swrContext = swr_alloc_set_opts(NULL,
		outChannelLayout,
		outSampleFormat,
		outSampleRate,
		av_get_default_channel_layout(codecContext->channels),
		codecContext->sample_fmt,
		codecContext->sample_rate,
		0,
		NULL);

	if (swrContext == NULL)
	{
		av_free(frame);
		avcodec_close(codecContext);
		avformat_close_input(&formatContext);
		std::cout << "Couldn't create the SwrContext" << std::endl;
		return 1;
	}

	if (swr_init(swrContext) != 0)
	{
		av_free(frame);
		avcodec_close(codecContext);
		avformat_close_input(&formatContext);
		swr_free(&swrContext);
		std::cout << "Couldn't initialize the SwrContext" << std::endl;
		return 1;
	}

	fout = fopen(outFile, "wb+");

	AVPacket packet;
	av_init_packet(&packet);

	// Read the packets in a loop
	while (av_read_frame(formatContext, &packet) == 0)
	{
		if (packet.stream_index == audioStream->index)
		{
			AVPacket decodingPacket = packet;

			while (decodingPacket.size > 0)
			{
				// Try to decode the packet into a frame
				int frameFinished = 0;
				int result = avcodec_decode_audio4(codecContext, frame, &frameFinished, &decodingPacket);

				if (result < 0 || frameFinished == 0)
				{
					break;
				}

				//std::vector<unsigned char> buffer(wav.channels * wav.sampleRate * wav.sampleSize);

				unsigned char buffer[100000] = {NULL};
				unsigned char* pointers[SWR_CH_MAX] = {NULL};
				pointers[0] = &buffer[0];
				
				int numSamplesOut = swr_convert(swrContext, 
												pointers,
												outSampleRate, //wav.sampleRate,
												(const unsigned char**)frame->extended_data,
												//(const uint8_t**)frame->extended_data[0],
												frame->nb_samples);

				//processFrame(frame, swrContext, wav);
				
				//fwrite(  frame->data[0], sizeof(short), (size_t)(frame->nb_samples), fout);
				//fwrite(  frame->extended_data, sizeof(short), (size_t)(frame->nb_samples), fout);
				//uint16_t uiCnt_1 = (uint16_t )frame->extended_data[0];
				//uint16_t uiCnt_2 = (uint16_t )frame->extended_data[1];

				/*
				ReSampleContext *rs_ctx = NULL;
				// resample to 44100, stereo, s16
				rs_ctx = av_audio_resample_init(
					1, codecContext->channels,
					8000, codecContext->sample_rate,
					AV_SAMPLE_FMT_S16, codecContext->sample_fmt,
					16, 10, 0, 1);
				
				//outbuff = (uint8_t*)av_malloc(AVCODEC_MAX_AUDIO_FRAME_SIZE);
				short bufferSh[100000] = {NULL};
				// resampling
				//int after_sampled_len = audio_resample(rs_ctx, (short *)buffer, (short *)frame->extended_data[0], frame->nb_samples);
				int after_sampled_len = audio_resample(rs_ctx, bufferSh, (short *)frame->extended_data, frame->nb_samples);
				*/

				fwrite(  (short *)buffer, sizeof(short), (size_t)numSamplesOut, fout);

				decodingPacket.size -= result;
				decodingPacket.data += result;
			}

			/*
			// Try to decode the packet into a frame
			int frameFinished = 0;
			avcodec_decode_audio4(codecContext, frame, &frameFinished, &packet);

			// Some frames rely on multiple packets, so we have to make sure the frame is finished before
			// we can use it
			if (frameFinished)
			{

				//fwrite(  (short *)&(frame->data[0]), sizeof(short), (size_t)(frame->nb_samples*2), fout);
				fwrite(  (short *)&(frame->data[0]), sizeof(short), (size_t)(frame->nb_samples*2), fout);

				// frame now has usable audio data in it. How it's stored in the frame depends on the format of
				// the audio. If it's packed audio, all the data will be in frame->data[0]. If it's in planar format,
				// the data will be in frame->data and possibly frame->extended_data. Look at frame->data, frame->nb_samples,
				// frame->linesize, and other related fields on the FFmpeg docs. I don't know how you're actually using
				// the audio data, so I won't add any junk here that might confuse you. Typically, if I want to find
				// documentation on an FFmpeg structure or function, I just type "<name> doxygen" into google (like
				// "AVFrame doxygen" for AVFrame's docs)
			}
			*/
		}

		// You *must* call av_free_packet() after each call to av_read_frame() or else you'll leak memory
		av_free_packet(&packet);
	}

	// Some codecs will cause frames to be buffered up in the decoding process. If the CODEC_CAP_DELAY flag
	// is set, there can be buffered up frames that need to be flushed, so we'll do that
	if (codecContext->codec->capabilities & CODEC_CAP_DELAY)
	{
		av_init_packet(&packet);
		// Decode all the remaining frames in the buffer, until the end is reached
		int frameFinished = 0;
		while (avcodec_decode_audio4(codecContext, frame, &frameFinished, &packet) >= 0 && frameFinished)
		{
		}
	}

	// Clean up!
	av_free(frame);
	avcodec_close(codecContext);
	av_close_input_file(formatContext);
	fclose(fout);
}

When files 02.mp3 are converted into a format 8000 pcm mono okay.

See file voice_01_sinus_8000_mono.raw.

Any discrete mono converted well.

Any discrete stereo converted bad.
When converting to pcm stereo 8000 it turns wrong.

See file voice_01_ sinus_ 8000_stereo.raw.

When converting to pcm 44100 stereo also turns out not correct.

See file voice_01_ sinus_ 44100_stereo.raw. Distort the shape of a sine wave.

Attachments (7)

02.mp3 (623.9 KB ) - added by Oleg 10 years ago.
voice_01_sinus_8000_mono.raw (624.5 KB ) - added by Oleg 10 years ago.
voice_01_sinus_8000_stereo.raw (624.5 KB ) - added by Oleg 10 years ago.
voice_01_sinus_44100_stereo.raw (867.7 KB ) - added by Oleg 10 years ago.
voice_01_sinus_8000_mono.JPG (98.4 KB ) - added by Oleg 10 years ago.
voice_01_sinus_8000_stereo.JPG (106.7 KB ) - added by Oleg 10 years ago.
voice_01_sinus_44100_stereo.JPG (61.7 KB ) - added by Oleg 10 years ago.

Change History (8)

by Oleg, 10 years ago

Attachment:	02.mp3 added

by Oleg, 10 years ago

Attachment:	voice_01_sinus_8000_mono.raw added

by Oleg, 10 years ago

Attachment:	voice_01_sinus_8000_stereo.raw added

by Oleg, 10 years ago

Attachment:	voice_01_sinus_44100_stereo.raw added

by Oleg, 10 years ago

Attachment:	voice_01_sinus_8000_mono.JPG added

by Oleg, 10 years ago

Attachment:	voice_01_sinus_8000_stereo.JPG added

by Oleg, 10 years ago

Attachment:	voice_01_sinus_44100_stereo.JPG added

comment:1 by Carl Eugen Hoyos, 10 years ago

Resolution:	→ duplicate
Status:	new → closed

Note: See TracTickets for help on using tickets.

Download in other formats: