qt_rtmp_demo/media/audiocaptureff.cpp

410 lines
14 KiB
C++
Raw Normal View History

2023-11-12 16:13:24 +00:00
#include "audiocaptureff.h"
#ifdef __MINGW32__
std::string WString2String(const std::wstring& ws)
{
std::string strLocale = setlocale(LC_ALL, "");
const wchar_t* wchSrc = ws.c_str();
size_t nDestSize = wcstombs(NULL, wchSrc, 0) + 1;
char *chDest = new char[nDestSize];
memset(chDest, 0, nDestSize);
wcstombs(chDest, wchSrc, nDestSize);
std::string strResult = chDest;
delete[]chDest;
setlocale(LC_ALL, strLocale.c_str());
return strResult;
}
#endif
vector<CaptureAudioFfmpeg::MICInfo> CaptureAudioFfmpeg::EnumSpeakers()
{
vector<CaptureAudioFfmpeg::MICInfo> ret;
std::vector<std::wstring> names;
IEnumMoniker *pEnum = nullptr;
// Create the System Device Enumerator.
ICreateDevEnum *pDevEnum;
HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, nullptr,
CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pDevEnum));
if (SUCCEEDED(hr))
{
// Create an enumerator for the category.
hr = pDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEnum, 0);
if (hr == S_FALSE)
{
hr = VFW_E_NOT_FOUND; // The category is empty. Treat as an error.
}
pDevEnum->Release();
}
if (!SUCCEEDED(hr))
return ret;
IMoniker *pMoniker = nullptr;
while (pEnum->Next(1, &pMoniker, nullptr) == S_OK)
{
IPropertyBag *pPropBag;
IBindCtx* bindCtx = nullptr;
LPOLESTR str = nullptr;
VARIANT var;
VariantInit(&var);
HRESULT hr = pMoniker->BindToStorage(0, 0, IID_PPV_ARGS(&pPropBag));
if (FAILED(hr))
{
pMoniker->Release();
continue;
}
// Get description or friendly name.
hr = pPropBag->Read(L"Description", &var, 0);
if (FAILED(hr))
{
hr = pPropBag->Read(L"FriendlyName", &var, 0);
}
if (SUCCEEDED(hr))
{
names.push_back(var.bstrVal);
CaptureAudioFfmpeg::MICInfo ele;
ele.name = var.bstrVal;
ret.push_back(ele);
VariantClear(&var);
}
pPropBag->Release();
pMoniker->Release();
}
pEnum->Release();
return ret;
}
CaptureAudioFfmpeg::CaptureAudioFfmpeg(uint16_t rate, uint8_t channel)
{
mSampleRate = rate;
mChanel = channel;
}
static char *dup_wchar_to_utf8(wchar_t *w)
{
char *s = NULL;
int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
s = (char *)av_malloc(l);
if (s)
WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
return s;
}
int CaptureAudioFfmpeg::InitCapture(wstring url, uint16_t rate, uint8_t channel)
{
string fileAudioInput = dup_wchar_to_utf8((wchar_t *)url.c_str());
AVInputFormat* imft = av_find_input_format("dshow");
AVDictionary *format_opts = nullptr;
av_dict_set_int(&format_opts, "audio_buffer_size", 20, 0);
if (0 > avformat_open_input(&mInfmt_ctx, fileAudioInput.c_str(), imft, &format_opts)) {
printf("failed input file\n");
return -1;
}
if (0 > avformat_find_stream_info(mInfmt_ctx, NULL)) {
printf("failed find stream info\n");
avformat_close_input(&mInfmt_ctx);
return -1;
}
int audio_index = -1;
audio_index = av_find_best_stream(mInfmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (-1 == audio_index) {
printf("failed find best stream\n");
avformat_close_input(&mInfmt_ctx);
return -1;
}
//av_dump_format(infmt_ctx, 0, fileAudioInput.c_str(), 1);
//END<4E><44><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
//<2F>򿪽<EFBFBD><F2BFAABD><EFBFBD><EFBFBD><EFBFBD>
static AVCodec* decodec = avcodec_find_decoder(mInfmt_ctx->streams[0]->codec->codec_id);
if (!decodec) {
printf("failed find decoder\n");
return -1;
}
if (0 > avcodec_open2(mInfmt_ctx->streams[0]->codec, decodec, NULL)) {
printf("failed open decoder\n");
return -1;
}
//END<4E><44><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//<2F>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>
initAudioFilters();
//END<4E>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
static AVCodec* codec = NULL;
//codec = avcodec_find_encoder_by_name("libmp3lame");
codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
static AVCodecContext* codec_ctx = NULL;
codec_ctx = avcodec_alloc_context3(codec);
// codec_ctx->bit_rate = 64000;
// inputContext->streams[0]->codec
codec_ctx->codec = codec;
codec_ctx->sample_rate = 48000;
codec_ctx->channel_layout = 3;
codec_ctx->channels = 2;
//codec_ctx->frame_size = 1024;
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
codec_ctx->codec_tag = 0;
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
if (0 > avcodec_open2(codec_ctx, codec, NULL)) {
printf("failed open coder\n");
avformat_close_input(&mInfmt_ctx);
avcodec_free_context(&codec_ctx);
return -1;
}
//END<4E><44><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//<2F><><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
AVFormatContext* outfmt_ctx = NULL;
if (0 > avformat_alloc_output_context2(&outfmt_ctx, NULL, NULL, "aac.aac")) {
printf("failed alloc outputcontext\n");
avformat_close_input(&mInfmt_ctx);
avcodec_free_context(&codec_ctx);
return -1;
}
AVStream* out_stream = avformat_new_stream(outfmt_ctx, codec_ctx->codec);
if (!out_stream) {
printf("failed new stream\n");
avformat_close_input(&mInfmt_ctx);
avcodec_free_context(&codec_ctx);
avformat_close_input(&outfmt_ctx);
return -1;
}
avcodec_copy_context(out_stream->codec, codec_ctx);
// if (0 > avio_open(&outfmt_ctx->pb, "rtmp://localhost/testlive", AVIO_FLAG_WRITE)) {
if (0 > avio_open(&outfmt_ctx->pb, "aac.aac", AVIO_FLAG_WRITE)) {
printf("failed to open outfile\n");
avformat_close_input(&mInfmt_ctx);
avcodec_free_context(&codec_ctx);
avformat_close_input(&outfmt_ctx);
return -1;
}
avformat_write_header(outfmt_ctx, NULL);
//END<4E><44><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
#if 0
AVFrame* Frame = av_frame_alloc();
Frame->nb_samples = codec_ctx->frame_size;
Frame->format = codec_ctx->sample_fmt;
Frame->channel_layout = codec_ctx->channel_layout;
int size = av_samples_get_buffer_size(NULL, codec_ctx->channels, codec_ctx->frame_size,
codec_ctx->sample_fmt, 1);
uint8_t* frame_buf = (uint8_t *)av_malloc(size);
avcodec_fill_audio_frame(Frame, codec_ctx->channels, codec_ctx->sample_fmt, (const uint8_t*)frame_buf, size, 1);
int64_t in_channel_layout = av_get_default_channel_layout(codec_ctx->channels);
AVPacket pkt;
av_new_packet(&pkt, size);
pkt.data = NULL;
int got_frame = -1;
int delayedFrame = 0;
static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
int audioCount = 0;
const uint8_t *indata[AV_NUM_DATA_POINTERS] = { 0 };
AVFrame* Frame1 = av_frame_alloc();
#endif
int loop = 1;
int delayedFrame = 0;
AVPacket packet;
av_init_packet(&packet);
packet.data = NULL;
packet.size = 0;
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
AVFrame* pSrcAudioFrame = av_frame_alloc();
int got_frame = 0;
while (1) {
av_read_frame(mInfmt_ctx, &packet);
loop++;
if (packet.stream_index == audio_index) {
auto filterFrame = DecodeAudio(&packet, pSrcAudioFrame);
if (filterFrame) {
avcodec_encode_audio2(codec_ctx, &pkt, filterFrame, &got_frame);
if (got_frame) {
#if 1
auto streamTimeBase = outfmt_ctx->streams[pkt.stream_index]->time_base.den;
auto codecTimeBase = outfmt_ctx->streams[pkt.stream_index]->codec->time_base.den;
pkt.pts = pkt.dts = (1024 * streamTimeBase * mAudioCount) / codecTimeBase;
mAudioCount++;
auto inputStream = mInfmt_ctx->streams[pkt.stream_index];
auto outputStream = outfmt_ctx->streams[pkt.stream_index];
av_packet_rescale_ts(&pkt, inputStream->time_base, outputStream->time_base);
#endif
// pkt.stream_index = out_stream->index;
av_interleaved_write_frame(outfmt_ctx, &pkt);
av_packet_unref(&pkt);
printf("output frame %3d\n", loop - delayedFrame);
}
else {
delayedFrame++;
av_packet_unref(&pkt);
printf("no output frame\n");
}
}
}
av_packet_unref(&packet);
}
flush_encoder(outfmt_ctx, 0);
av_write_trailer(outfmt_ctx);
//av_free(Frame);
av_free(pSrcAudioFrame);
avio_close(outfmt_ctx->pb);
avformat_close_input(&mInfmt_ctx);
//avformat_close_input(&outfmt_ctx);
return 0;
}
int CaptureAudioFfmpeg::initAudioFilters()
{
char args[512];
int ret;
AVFilter *abuffersrc = (AVFilter *)avfilter_get_by_name("abuffer");
AVFilter *abuffersink = (AVFilter *)avfilter_get_by_name("abuffersink");
AVFilterInOut *outputs = avfilter_inout_alloc();
AVFilterInOut *inputs = avfilter_inout_alloc();
auto audioDecoderContext = mInfmt_ctx->streams[0]->codec;
if (!audioDecoderContext->channel_layout)
audioDecoderContext->channel_layout = av_get_default_channel_layout(audioDecoderContext->channels);
static const enum AVSampleFormat out_sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
static const uint64_t out_channel_layouts[] = { audioDecoderContext->channel_layout};
static const int out_sample_rates[] = { audioDecoderContext->sample_rate , -1 };
AVRational time_base = mInfmt_ctx->streams[0]->time_base;
mFilterGraph = avfilter_graph_alloc();
mFilterGraph->nb_threads = 1;
sprintf_s(args, sizeof(args),
"time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
time_base.num, time_base.den, audioDecoderContext->sample_rate,
av_get_sample_fmt_name(audioDecoderContext->sample_fmt),
audioDecoderContext->channel_layout);
ret = avfilter_graph_create_filter(&mBuffersrcCtx, abuffersrc, "in",
args, NULL, mFilterGraph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n");
return ret;
}
/* buffer audio sink: to terminate the filter chain. */
ret = avfilter_graph_create_filter(&mBuffersinkCtx, abuffersink, "out",
NULL, NULL, mFilterGraph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n");
return ret;
}
ret = av_opt_set_int_list(mBuffersinkCtx, "sample_fmts", out_sample_fmts, -1,
AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n");
return ret;
}
ret = av_opt_set_int_list(mBuffersinkCtx, "channel_layouts", out_channel_layouts, -1,
AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n");
return ret;
}
ret = av_opt_set_int_list(mBuffersinkCtx, "sample_rates", out_sample_rates, -1,
AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n");
return ret;
}
/* Endpoints for the filter graph. */
outputs->name = av_strdup("in");
outputs->filter_ctx = mBuffersrcCtx;;
outputs->pad_idx = 0;
outputs->next = NULL;
inputs->name = av_strdup("out");
inputs->filter_ctx = mBuffersinkCtx;
inputs->pad_idx = 0;
inputs->next = NULL;
if ((ret = avfilter_graph_parse_ptr(mFilterGraph, "anull",
&inputs, &outputs, nullptr)) < 0)
return ret;
if ((ret = avfilter_graph_config(mFilterGraph, NULL)) < 0)
return ret;
av_buffersink_set_frame_size(mBuffersinkCtx, 1024);
return 0;
}
int CaptureAudioFfmpeg::flush_encoder(AVFormatContext *fmt_ctx, unsigned int stream_index)
{
int ret;
int got_frame;
AVPacket enc_pkt;
if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
0x0020))
return 0;
while (1) {
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_audio2(fmt_ctx->streams[stream_index]->codec, &enc_pkt,
NULL, &got_frame);
av_frame_free(NULL);
if (ret < 0)
break;
if (!got_frame) {
ret = 0;
break;
}
printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n", enc_pkt.size);
/* mux encoded frame */
ret = av_write_frame(fmt_ctx, &enc_pkt);
if (ret < 0)
break;
}
return ret;
}
AVFrame *CaptureAudioFfmpeg::DecodeAudio(AVPacket *packet, AVFrame *pSrcAudioFrame)
{
AVStream * stream = mInfmt_ctx->streams[0];
AVCodecContext* codecContext = stream->codec;
int gotFrame;
AVFrame *filtFrame = nullptr;
auto length = avcodec_decode_audio4(codecContext, pSrcAudioFrame, &gotFrame, packet);
if (length >= 0 && gotFrame != 0)
{
if (av_buffersrc_add_frame_flags(mBuffersrcCtx, pSrcAudioFrame, AV_BUFFERSRC_FLAG_PUSH) < 0) {
av_log(NULL, AV_LOG_ERROR, "buffe src add frame error!\n");
return nullptr;
}
filtFrame = av_frame_alloc();
int ret = av_buffersink_get_frame_flags(mBuffersinkCtx, filtFrame, AV_BUFFERSINK_FLAG_NO_REQUEST);
if (ret < 0)
{
av_frame_free(&filtFrame);
goto error;
}
return filtFrame;
}
error:
return nullptr;
}