qt_rtmp_demo/media/audiocaptureff.cpp

410 lines
14 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#include "audiocaptureff.h"
#ifdef __MINGW32__
std::string WString2String(const std::wstring& ws)
{
std::string strLocale = setlocale(LC_ALL, "");
const wchar_t* wchSrc = ws.c_str();
size_t nDestSize = wcstombs(NULL, wchSrc, 0) + 1;
char *chDest = new char[nDestSize];
memset(chDest, 0, nDestSize);
wcstombs(chDest, wchSrc, nDestSize);
std::string strResult = chDest;
delete[]chDest;
setlocale(LC_ALL, strLocale.c_str());
return strResult;
}
#endif
vector<CaptureAudioFfmpeg::MICInfo> CaptureAudioFfmpeg::EnumSpeakers()
{
vector<CaptureAudioFfmpeg::MICInfo> ret;
std::vector<std::wstring> names;
IEnumMoniker *pEnum = nullptr;
// Create the System Device Enumerator.
ICreateDevEnum *pDevEnum;
HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, nullptr,
CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pDevEnum));
if (SUCCEEDED(hr))
{
// Create an enumerator for the category.
hr = pDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEnum, 0);
if (hr == S_FALSE)
{
hr = VFW_E_NOT_FOUND; // The category is empty. Treat as an error.
}
pDevEnum->Release();
}
if (!SUCCEEDED(hr))
return ret;
IMoniker *pMoniker = nullptr;
while (pEnum->Next(1, &pMoniker, nullptr) == S_OK)
{
IPropertyBag *pPropBag;
IBindCtx* bindCtx = nullptr;
LPOLESTR str = nullptr;
VARIANT var;
VariantInit(&var);
HRESULT hr = pMoniker->BindToStorage(0, 0, IID_PPV_ARGS(&pPropBag));
if (FAILED(hr))
{
pMoniker->Release();
continue;
}
// Get description or friendly name.
hr = pPropBag->Read(L"Description", &var, 0);
if (FAILED(hr))
{
hr = pPropBag->Read(L"FriendlyName", &var, 0);
}
if (SUCCEEDED(hr))
{
names.push_back(var.bstrVal);
CaptureAudioFfmpeg::MICInfo ele;
ele.name = var.bstrVal;
ret.push_back(ele);
VariantClear(&var);
}
pPropBag->Release();
pMoniker->Release();
}
pEnum->Release();
return ret;
}
CaptureAudioFfmpeg::CaptureAudioFfmpeg(uint16_t rate, uint8_t channel)
{
mSampleRate = rate;
mChanel = channel;
}
static char *dup_wchar_to_utf8(wchar_t *w)
{
char *s = NULL;
int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
s = (char *)av_malloc(l);
if (s)
WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
return s;
}
int CaptureAudioFfmpeg::InitCapture(wstring url, uint16_t rate, uint8_t channel)
{
string fileAudioInput = dup_wchar_to_utf8((wchar_t *)url.c_str());
AVInputFormat* imft = av_find_input_format("dshow");
AVDictionary *format_opts = nullptr;
av_dict_set_int(&format_opts, "audio_buffer_size", 20, 0);
if (0 > avformat_open_input(&mInfmt_ctx, fileAudioInput.c_str(), imft, &format_opts)) {
printf("failed input file\n");
return -1;
}
if (0 > avformat_find_stream_info(mInfmt_ctx, NULL)) {
printf("failed find stream info\n");
avformat_close_input(&mInfmt_ctx);
return -1;
}
int audio_index = -1;
audio_index = av_find_best_stream(mInfmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (-1 == audio_index) {
printf("failed find best stream\n");
avformat_close_input(&mInfmt_ctx);
return -1;
}
//av_dump_format(infmt_ctx, 0, fileAudioInput.c_str(), 1);
//END<4E><44><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
//<2F>򿪽<EFBFBD><F2BFAABD><EFBFBD><EFBFBD><EFBFBD>
static AVCodec* decodec = avcodec_find_decoder(mInfmt_ctx->streams[0]->codec->codec_id);
if (!decodec) {
printf("failed find decoder\n");
return -1;
}
if (0 > avcodec_open2(mInfmt_ctx->streams[0]->codec, decodec, NULL)) {
printf("failed open decoder\n");
return -1;
}
//END<4E><44><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//<2F>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>
initAudioFilters();
//END<4E>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
static AVCodec* codec = NULL;
//codec = avcodec_find_encoder_by_name("libmp3lame");
codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
static AVCodecContext* codec_ctx = NULL;
codec_ctx = avcodec_alloc_context3(codec);
// codec_ctx->bit_rate = 64000;
// inputContext->streams[0]->codec
codec_ctx->codec = codec;
codec_ctx->sample_rate = 48000;
codec_ctx->channel_layout = 3;
codec_ctx->channels = 2;
//codec_ctx->frame_size = 1024;
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
codec_ctx->codec_tag = 0;
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
if (0 > avcodec_open2(codec_ctx, codec, NULL)) {
printf("failed open coder\n");
avformat_close_input(&mInfmt_ctx);
avcodec_free_context(&codec_ctx);
return -1;
}
//END<4E><44><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//<2F><><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
AVFormatContext* outfmt_ctx = NULL;
if (0 > avformat_alloc_output_context2(&outfmt_ctx, NULL, NULL, "aac.aac")) {
printf("failed alloc outputcontext\n");
avformat_close_input(&mInfmt_ctx);
avcodec_free_context(&codec_ctx);
return -1;
}
AVStream* out_stream = avformat_new_stream(outfmt_ctx, codec_ctx->codec);
if (!out_stream) {
printf("failed new stream\n");
avformat_close_input(&mInfmt_ctx);
avcodec_free_context(&codec_ctx);
avformat_close_input(&outfmt_ctx);
return -1;
}
avcodec_copy_context(out_stream->codec, codec_ctx);
// if (0 > avio_open(&outfmt_ctx->pb, "rtmp://localhost/testlive", AVIO_FLAG_WRITE)) {
if (0 > avio_open(&outfmt_ctx->pb, "aac.aac", AVIO_FLAG_WRITE)) {
printf("failed to open outfile\n");
avformat_close_input(&mInfmt_ctx);
avcodec_free_context(&codec_ctx);
avformat_close_input(&outfmt_ctx);
return -1;
}
avformat_write_header(outfmt_ctx, NULL);
//END<4E><44><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
#if 0
AVFrame* Frame = av_frame_alloc();
Frame->nb_samples = codec_ctx->frame_size;
Frame->format = codec_ctx->sample_fmt;
Frame->channel_layout = codec_ctx->channel_layout;
int size = av_samples_get_buffer_size(NULL, codec_ctx->channels, codec_ctx->frame_size,
codec_ctx->sample_fmt, 1);
uint8_t* frame_buf = (uint8_t *)av_malloc(size);
avcodec_fill_audio_frame(Frame, codec_ctx->channels, codec_ctx->sample_fmt, (const uint8_t*)frame_buf, size, 1);
int64_t in_channel_layout = av_get_default_channel_layout(codec_ctx->channels);
AVPacket pkt;
av_new_packet(&pkt, size);
pkt.data = NULL;
int got_frame = -1;
int delayedFrame = 0;
static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
int audioCount = 0;
const uint8_t *indata[AV_NUM_DATA_POINTERS] = { 0 };
AVFrame* Frame1 = av_frame_alloc();
#endif
int loop = 1;
int delayedFrame = 0;
AVPacket packet;
av_init_packet(&packet);
packet.data = NULL;
packet.size = 0;
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
AVFrame* pSrcAudioFrame = av_frame_alloc();
int got_frame = 0;
while (1) {
av_read_frame(mInfmt_ctx, &packet);
loop++;
if (packet.stream_index == audio_index) {
auto filterFrame = DecodeAudio(&packet, pSrcAudioFrame);
if (filterFrame) {
avcodec_encode_audio2(codec_ctx, &pkt, filterFrame, &got_frame);
if (got_frame) {
#if 1
auto streamTimeBase = outfmt_ctx->streams[pkt.stream_index]->time_base.den;
auto codecTimeBase = outfmt_ctx->streams[pkt.stream_index]->codec->time_base.den;
pkt.pts = pkt.dts = (1024 * streamTimeBase * mAudioCount) / codecTimeBase;
mAudioCount++;
auto inputStream = mInfmt_ctx->streams[pkt.stream_index];
auto outputStream = outfmt_ctx->streams[pkt.stream_index];
av_packet_rescale_ts(&pkt, inputStream->time_base, outputStream->time_base);
#endif
// pkt.stream_index = out_stream->index;
av_interleaved_write_frame(outfmt_ctx, &pkt);
av_packet_unref(&pkt);
printf("output frame %3d\n", loop - delayedFrame);
}
else {
delayedFrame++;
av_packet_unref(&pkt);
printf("no output frame\n");
}
}
}
av_packet_unref(&packet);
}
flush_encoder(outfmt_ctx, 0);
av_write_trailer(outfmt_ctx);
//av_free(Frame);
av_free(pSrcAudioFrame);
avio_close(outfmt_ctx->pb);
avformat_close_input(&mInfmt_ctx);
//avformat_close_input(&outfmt_ctx);
return 0;
}
int CaptureAudioFfmpeg::initAudioFilters()
{
char args[512];
int ret;
AVFilter *abuffersrc = (AVFilter *)avfilter_get_by_name("abuffer");
AVFilter *abuffersink = (AVFilter *)avfilter_get_by_name("abuffersink");
AVFilterInOut *outputs = avfilter_inout_alloc();
AVFilterInOut *inputs = avfilter_inout_alloc();
auto audioDecoderContext = mInfmt_ctx->streams[0]->codec;
if (!audioDecoderContext->channel_layout)
audioDecoderContext->channel_layout = av_get_default_channel_layout(audioDecoderContext->channels);
static const enum AVSampleFormat out_sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
static const uint64_t out_channel_layouts[] = { audioDecoderContext->channel_layout};
static const int out_sample_rates[] = { audioDecoderContext->sample_rate , -1 };
AVRational time_base = mInfmt_ctx->streams[0]->time_base;
mFilterGraph = avfilter_graph_alloc();
mFilterGraph->nb_threads = 1;
sprintf_s(args, sizeof(args),
"time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
time_base.num, time_base.den, audioDecoderContext->sample_rate,
av_get_sample_fmt_name(audioDecoderContext->sample_fmt),
audioDecoderContext->channel_layout);
ret = avfilter_graph_create_filter(&mBuffersrcCtx, abuffersrc, "in",
args, NULL, mFilterGraph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n");
return ret;
}
/* buffer audio sink: to terminate the filter chain. */
ret = avfilter_graph_create_filter(&mBuffersinkCtx, abuffersink, "out",
NULL, NULL, mFilterGraph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n");
return ret;
}
ret = av_opt_set_int_list(mBuffersinkCtx, "sample_fmts", out_sample_fmts, -1,
AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n");
return ret;
}
ret = av_opt_set_int_list(mBuffersinkCtx, "channel_layouts", out_channel_layouts, -1,
AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n");
return ret;
}
ret = av_opt_set_int_list(mBuffersinkCtx, "sample_rates", out_sample_rates, -1,
AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n");
return ret;
}
/* Endpoints for the filter graph. */
outputs->name = av_strdup("in");
outputs->filter_ctx = mBuffersrcCtx;;
outputs->pad_idx = 0;
outputs->next = NULL;
inputs->name = av_strdup("out");
inputs->filter_ctx = mBuffersinkCtx;
inputs->pad_idx = 0;
inputs->next = NULL;
if ((ret = avfilter_graph_parse_ptr(mFilterGraph, "anull",
&inputs, &outputs, nullptr)) < 0)
return ret;
if ((ret = avfilter_graph_config(mFilterGraph, NULL)) < 0)
return ret;
av_buffersink_set_frame_size(mBuffersinkCtx, 1024);
return 0;
}
int CaptureAudioFfmpeg::flush_encoder(AVFormatContext *fmt_ctx, unsigned int stream_index)
{
int ret;
int got_frame;
AVPacket enc_pkt;
if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
0x0020))
return 0;
while (1) {
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_audio2(fmt_ctx->streams[stream_index]->codec, &enc_pkt,
NULL, &got_frame);
av_frame_free(NULL);
if (ret < 0)
break;
if (!got_frame) {
ret = 0;
break;
}
printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n", enc_pkt.size);
/* mux encoded frame */
ret = av_write_frame(fmt_ctx, &enc_pkt);
if (ret < 0)
break;
}
return ret;
}
AVFrame *CaptureAudioFfmpeg::DecodeAudio(AVPacket *packet, AVFrame *pSrcAudioFrame)
{
AVStream * stream = mInfmt_ctx->streams[0];
AVCodecContext* codecContext = stream->codec;
int gotFrame;
AVFrame *filtFrame = nullptr;
auto length = avcodec_decode_audio4(codecContext, pSrcAudioFrame, &gotFrame, packet);
if (length >= 0 && gotFrame != 0)
{
if (av_buffersrc_add_frame_flags(mBuffersrcCtx, pSrcAudioFrame, AV_BUFFERSRC_FLAG_PUSH) < 0) {
av_log(NULL, AV_LOG_ERROR, "buffe src add frame error!\n");
return nullptr;
}
filtFrame = av_frame_alloc();
int ret = av_buffersink_get_frame_flags(mBuffersinkCtx, filtFrame, AV_BUFFERSINK_FLAG_NO_REQUEST);
if (ret < 0)
{
av_frame_free(&filtFrame);
goto error;
}
return filtFrame;
}
error:
return nullptr;
}