410 lines
14 KiB
C++
410 lines
14 KiB
C++
#include "audiocaptureff.h"
|
||
|
||
#ifdef __MINGW32__
|
||
std::string WString2String(const std::wstring& ws)
|
||
{
|
||
std::string strLocale = setlocale(LC_ALL, "");
|
||
const wchar_t* wchSrc = ws.c_str();
|
||
size_t nDestSize = wcstombs(NULL, wchSrc, 0) + 1;
|
||
char *chDest = new char[nDestSize];
|
||
memset(chDest, 0, nDestSize);
|
||
wcstombs(chDest, wchSrc, nDestSize);
|
||
std::string strResult = chDest;
|
||
delete[]chDest;
|
||
setlocale(LC_ALL, strLocale.c_str());
|
||
return strResult;
|
||
}
|
||
#endif
|
||
|
||
vector<CaptureAudioFfmpeg::MICInfo> CaptureAudioFfmpeg::EnumSpeakers()
|
||
{
|
||
vector<CaptureAudioFfmpeg::MICInfo> ret;
|
||
std::vector<std::wstring> names;
|
||
IEnumMoniker *pEnum = nullptr;
|
||
// Create the System Device Enumerator.
|
||
ICreateDevEnum *pDevEnum;
|
||
HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, nullptr,
|
||
CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pDevEnum));
|
||
|
||
if (SUCCEEDED(hr))
|
||
{
|
||
// Create an enumerator for the category.
|
||
hr = pDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEnum, 0);
|
||
if (hr == S_FALSE)
|
||
{
|
||
hr = VFW_E_NOT_FOUND; // The category is empty. Treat as an error.
|
||
}
|
||
pDevEnum->Release();
|
||
}
|
||
|
||
if (!SUCCEEDED(hr))
|
||
return ret;
|
||
|
||
IMoniker *pMoniker = nullptr;
|
||
while (pEnum->Next(1, &pMoniker, nullptr) == S_OK)
|
||
{
|
||
IPropertyBag *pPropBag;
|
||
IBindCtx* bindCtx = nullptr;
|
||
LPOLESTR str = nullptr;
|
||
VARIANT var;
|
||
VariantInit(&var);
|
||
|
||
HRESULT hr = pMoniker->BindToStorage(0, 0, IID_PPV_ARGS(&pPropBag));
|
||
if (FAILED(hr))
|
||
{
|
||
pMoniker->Release();
|
||
continue;
|
||
}
|
||
|
||
// Get description or friendly name.
|
||
hr = pPropBag->Read(L"Description", &var, 0);
|
||
if (FAILED(hr))
|
||
{
|
||
hr = pPropBag->Read(L"FriendlyName", &var, 0);
|
||
}
|
||
if (SUCCEEDED(hr))
|
||
{
|
||
names.push_back(var.bstrVal);
|
||
CaptureAudioFfmpeg::MICInfo ele;
|
||
ele.name = var.bstrVal;
|
||
ret.push_back(ele);
|
||
VariantClear(&var);
|
||
}
|
||
|
||
pPropBag->Release();
|
||
pMoniker->Release();
|
||
}
|
||
|
||
pEnum->Release();
|
||
|
||
return ret;
|
||
}
|
||
|
||
|
||
CaptureAudioFfmpeg::CaptureAudioFfmpeg(uint16_t rate, uint8_t channel)
|
||
{
|
||
mSampleRate = rate;
|
||
mChanel = channel;
|
||
|
||
}
|
||
|
||
|
||
static char *dup_wchar_to_utf8(wchar_t *w)
|
||
{
|
||
char *s = NULL;
|
||
int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
|
||
s = (char *)av_malloc(l);
|
||
if (s)
|
||
WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
|
||
return s;
|
||
}
|
||
|
||
int CaptureAudioFfmpeg::InitCapture(wstring url, uint16_t rate, uint8_t channel)
|
||
{
|
||
string fileAudioInput = dup_wchar_to_utf8((wchar_t *)url.c_str());
|
||
AVInputFormat* imft = av_find_input_format("dshow");
|
||
AVDictionary *format_opts = nullptr;
|
||
av_dict_set_int(&format_opts, "audio_buffer_size", 20, 0);
|
||
if (0 > avformat_open_input(&mInfmt_ctx, fileAudioInput.c_str(), imft, &format_opts)) {
|
||
printf("failed input file\n");
|
||
return -1;
|
||
}
|
||
if (0 > avformat_find_stream_info(mInfmt_ctx, NULL)) {
|
||
printf("failed find stream info\n");
|
||
avformat_close_input(&mInfmt_ctx);
|
||
return -1;
|
||
}
|
||
int audio_index = -1;
|
||
audio_index = av_find_best_stream(mInfmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
|
||
if (-1 == audio_index) {
|
||
printf("failed find best stream\n");
|
||
avformat_close_input(&mInfmt_ctx);
|
||
return -1;
|
||
}
|
||
//av_dump_format(infmt_ctx, 0, fileAudioInput.c_str(), 1);
|
||
//END<4E><44><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
|
||
|
||
//<2F><EFBFBD><F2BFAABD><EFBFBD><EFBFBD><EFBFBD>
|
||
static AVCodec* decodec = avcodec_find_decoder(mInfmt_ctx->streams[0]->codec->codec_id);
|
||
if (!decodec) {
|
||
printf("failed find decoder\n");
|
||
return -1;
|
||
}
|
||
if (0 > avcodec_open2(mInfmt_ctx->streams[0]->codec, decodec, NULL)) {
|
||
printf("failed open decoder\n");
|
||
return -1;
|
||
}
|
||
//END<4E><44><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||
//<2F>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>
|
||
initAudioFilters();
|
||
//END<4E>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>
|
||
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||
static AVCodec* codec = NULL;
|
||
//codec = avcodec_find_encoder_by_name("libmp3lame");
|
||
codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
|
||
static AVCodecContext* codec_ctx = NULL;
|
||
codec_ctx = avcodec_alloc_context3(codec);
|
||
// codec_ctx->bit_rate = 64000;
|
||
// inputContext->streams[0]->codec
|
||
codec_ctx->codec = codec;
|
||
codec_ctx->sample_rate = 48000;
|
||
codec_ctx->channel_layout = 3;
|
||
codec_ctx->channels = 2;
|
||
//codec_ctx->frame_size = 1024;
|
||
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
|
||
codec_ctx->codec_tag = 0;
|
||
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
|
||
|
||
if (0 > avcodec_open2(codec_ctx, codec, NULL)) {
|
||
printf("failed open coder\n");
|
||
avformat_close_input(&mInfmt_ctx);
|
||
avcodec_free_context(&codec_ctx);
|
||
return -1;
|
||
}
|
||
//END<4E><44><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||
//<2F><><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
|
||
AVFormatContext* outfmt_ctx = NULL;
|
||
if (0 > avformat_alloc_output_context2(&outfmt_ctx, NULL, NULL, "aac.aac")) {
|
||
printf("failed alloc outputcontext\n");
|
||
avformat_close_input(&mInfmt_ctx);
|
||
avcodec_free_context(&codec_ctx);
|
||
return -1;
|
||
}
|
||
AVStream* out_stream = avformat_new_stream(outfmt_ctx, codec_ctx->codec);
|
||
if (!out_stream) {
|
||
printf("failed new stream\n");
|
||
avformat_close_input(&mInfmt_ctx);
|
||
avcodec_free_context(&codec_ctx);
|
||
avformat_close_input(&outfmt_ctx);
|
||
return -1;
|
||
}
|
||
avcodec_copy_context(out_stream->codec, codec_ctx);
|
||
// if (0 > avio_open(&outfmt_ctx->pb, "rtmp://localhost/testlive", AVIO_FLAG_WRITE)) {
|
||
if (0 > avio_open(&outfmt_ctx->pb, "aac.aac", AVIO_FLAG_WRITE)) {
|
||
printf("failed to open outfile\n");
|
||
avformat_close_input(&mInfmt_ctx);
|
||
avcodec_free_context(&codec_ctx);
|
||
avformat_close_input(&outfmt_ctx);
|
||
return -1;
|
||
}
|
||
avformat_write_header(outfmt_ctx, NULL);
|
||
//END<4E><44><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
|
||
#if 0
|
||
AVFrame* Frame = av_frame_alloc();
|
||
Frame->nb_samples = codec_ctx->frame_size;
|
||
Frame->format = codec_ctx->sample_fmt;
|
||
Frame->channel_layout = codec_ctx->channel_layout;
|
||
int size = av_samples_get_buffer_size(NULL, codec_ctx->channels, codec_ctx->frame_size,
|
||
codec_ctx->sample_fmt, 1);
|
||
uint8_t* frame_buf = (uint8_t *)av_malloc(size);
|
||
avcodec_fill_audio_frame(Frame, codec_ctx->channels, codec_ctx->sample_fmt, (const uint8_t*)frame_buf, size, 1);
|
||
int64_t in_channel_layout = av_get_default_channel_layout(codec_ctx->channels);
|
||
AVPacket pkt;
|
||
av_new_packet(&pkt, size);
|
||
pkt.data = NULL;
|
||
int got_frame = -1;
|
||
int delayedFrame = 0;
|
||
static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
|
||
int audioCount = 0;
|
||
const uint8_t *indata[AV_NUM_DATA_POINTERS] = { 0 };
|
||
AVFrame* Frame1 = av_frame_alloc();
|
||
#endif
|
||
int loop = 1;
|
||
int delayedFrame = 0;
|
||
AVPacket packet;
|
||
av_init_packet(&packet);
|
||
packet.data = NULL;
|
||
packet.size = 0;
|
||
AVPacket pkt;
|
||
av_init_packet(&pkt);
|
||
pkt.data = NULL;
|
||
pkt.size = 0;
|
||
|
||
AVFrame* pSrcAudioFrame = av_frame_alloc();
|
||
int got_frame = 0;
|
||
|
||
while (1) {
|
||
av_read_frame(mInfmt_ctx, &packet);
|
||
loop++;
|
||
if (packet.stream_index == audio_index) {
|
||
auto filterFrame = DecodeAudio(&packet, pSrcAudioFrame);
|
||
if (filterFrame) {
|
||
avcodec_encode_audio2(codec_ctx, &pkt, filterFrame, &got_frame);
|
||
if (got_frame) {
|
||
#if 1
|
||
auto streamTimeBase = outfmt_ctx->streams[pkt.stream_index]->time_base.den;
|
||
auto codecTimeBase = outfmt_ctx->streams[pkt.stream_index]->codec->time_base.den;
|
||
pkt.pts = pkt.dts = (1024 * streamTimeBase * mAudioCount) / codecTimeBase;
|
||
mAudioCount++;
|
||
auto inputStream = mInfmt_ctx->streams[pkt.stream_index];
|
||
auto outputStream = outfmt_ctx->streams[pkt.stream_index];
|
||
av_packet_rescale_ts(&pkt, inputStream->time_base, outputStream->time_base);
|
||
#endif
|
||
// pkt.stream_index = out_stream->index;
|
||
av_interleaved_write_frame(outfmt_ctx, &pkt);
|
||
av_packet_unref(&pkt);
|
||
printf("output frame %3d\n", loop - delayedFrame);
|
||
}
|
||
else {
|
||
delayedFrame++;
|
||
av_packet_unref(&pkt);
|
||
printf("no output frame\n");
|
||
}
|
||
}
|
||
}
|
||
av_packet_unref(&packet);
|
||
}
|
||
flush_encoder(outfmt_ctx, 0);
|
||
av_write_trailer(outfmt_ctx);
|
||
//av_free(Frame);
|
||
av_free(pSrcAudioFrame);
|
||
avio_close(outfmt_ctx->pb);
|
||
avformat_close_input(&mInfmt_ctx);
|
||
//avformat_close_input(&outfmt_ctx);
|
||
return 0;
|
||
}
|
||
|
||
int CaptureAudioFfmpeg::initAudioFilters()
|
||
{
|
||
char args[512];
|
||
int ret;
|
||
AVFilter *abuffersrc = (AVFilter *)avfilter_get_by_name("abuffer");
|
||
AVFilter *abuffersink = (AVFilter *)avfilter_get_by_name("abuffersink");
|
||
AVFilterInOut *outputs = avfilter_inout_alloc();
|
||
AVFilterInOut *inputs = avfilter_inout_alloc();
|
||
|
||
auto audioDecoderContext = mInfmt_ctx->streams[0]->codec;
|
||
if (!audioDecoderContext->channel_layout)
|
||
audioDecoderContext->channel_layout = av_get_default_channel_layout(audioDecoderContext->channels);
|
||
|
||
static const enum AVSampleFormat out_sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
|
||
static const uint64_t out_channel_layouts[] = { audioDecoderContext->channel_layout};
|
||
static const int out_sample_rates[] = { audioDecoderContext->sample_rate , -1 };
|
||
|
||
AVRational time_base = mInfmt_ctx->streams[0]->time_base;
|
||
mFilterGraph = avfilter_graph_alloc();
|
||
mFilterGraph->nb_threads = 1;
|
||
|
||
sprintf_s(args, sizeof(args),
|
||
"time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
|
||
time_base.num, time_base.den, audioDecoderContext->sample_rate,
|
||
av_get_sample_fmt_name(audioDecoderContext->sample_fmt),
|
||
audioDecoderContext->channel_layout);
|
||
|
||
ret = avfilter_graph_create_filter(&mBuffersrcCtx, abuffersrc, "in",
|
||
args, NULL, mFilterGraph);
|
||
if (ret < 0) {
|
||
av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n");
|
||
return ret;
|
||
}
|
||
|
||
/* buffer audio sink: to terminate the filter chain. */
|
||
ret = avfilter_graph_create_filter(&mBuffersinkCtx, abuffersink, "out",
|
||
NULL, NULL, mFilterGraph);
|
||
if (ret < 0) {
|
||
av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n");
|
||
return ret;
|
||
}
|
||
|
||
ret = av_opt_set_int_list(mBuffersinkCtx, "sample_fmts", out_sample_fmts, -1,
|
||
AV_OPT_SEARCH_CHILDREN);
|
||
if (ret < 0) {
|
||
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n");
|
||
return ret;
|
||
}
|
||
|
||
ret = av_opt_set_int_list(mBuffersinkCtx, "channel_layouts", out_channel_layouts, -1,
|
||
AV_OPT_SEARCH_CHILDREN);
|
||
if (ret < 0) {
|
||
av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n");
|
||
return ret;
|
||
}
|
||
|
||
ret = av_opt_set_int_list(mBuffersinkCtx, "sample_rates", out_sample_rates, -1,
|
||
AV_OPT_SEARCH_CHILDREN);
|
||
if (ret < 0) {
|
||
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n");
|
||
return ret;
|
||
}
|
||
|
||
/* Endpoints for the filter graph. */
|
||
outputs->name = av_strdup("in");
|
||
outputs->filter_ctx = mBuffersrcCtx;;
|
||
outputs->pad_idx = 0;
|
||
outputs->next = NULL;
|
||
|
||
inputs->name = av_strdup("out");
|
||
inputs->filter_ctx = mBuffersinkCtx;
|
||
inputs->pad_idx = 0;
|
||
inputs->next = NULL;
|
||
|
||
if ((ret = avfilter_graph_parse_ptr(mFilterGraph, "anull",
|
||
&inputs, &outputs, nullptr)) < 0)
|
||
return ret;
|
||
|
||
if ((ret = avfilter_graph_config(mFilterGraph, NULL)) < 0)
|
||
return ret;
|
||
|
||
av_buffersink_set_frame_size(mBuffersinkCtx, 1024);
|
||
return 0;
|
||
}
|
||
|
||
int CaptureAudioFfmpeg::flush_encoder(AVFormatContext *fmt_ctx, unsigned int stream_index)
|
||
{
|
||
int ret;
|
||
int got_frame;
|
||
AVPacket enc_pkt;
|
||
if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
|
||
0x0020))
|
||
return 0;
|
||
while (1) {
|
||
enc_pkt.data = NULL;
|
||
enc_pkt.size = 0;
|
||
av_init_packet(&enc_pkt);
|
||
ret = avcodec_encode_audio2(fmt_ctx->streams[stream_index]->codec, &enc_pkt,
|
||
NULL, &got_frame);
|
||
av_frame_free(NULL);
|
||
if (ret < 0)
|
||
break;
|
||
if (!got_frame) {
|
||
ret = 0;
|
||
break;
|
||
}
|
||
printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n", enc_pkt.size);
|
||
/* mux encoded frame */
|
||
ret = av_write_frame(fmt_ctx, &enc_pkt);
|
||
if (ret < 0)
|
||
break;
|
||
}
|
||
return ret;
|
||
|
||
|
||
}
|
||
|
||
AVFrame *CaptureAudioFfmpeg::DecodeAudio(AVPacket *packet, AVFrame *pSrcAudioFrame)
|
||
{
|
||
AVStream * stream = mInfmt_ctx->streams[0];
|
||
AVCodecContext* codecContext = stream->codec;
|
||
int gotFrame;
|
||
AVFrame *filtFrame = nullptr;
|
||
auto length = avcodec_decode_audio4(codecContext, pSrcAudioFrame, &gotFrame, packet);
|
||
if (length >= 0 && gotFrame != 0)
|
||
{
|
||
if (av_buffersrc_add_frame_flags(mBuffersrcCtx, pSrcAudioFrame, AV_BUFFERSRC_FLAG_PUSH) < 0) {
|
||
av_log(NULL, AV_LOG_ERROR, "buffe src add frame error!\n");
|
||
return nullptr;
|
||
}
|
||
|
||
filtFrame = av_frame_alloc();
|
||
int ret = av_buffersink_get_frame_flags(mBuffersinkCtx, filtFrame, AV_BUFFERSINK_FLAG_NO_REQUEST);
|
||
if (ret < 0)
|
||
{
|
||
av_frame_free(&filtFrame);
|
||
goto error;
|
||
}
|
||
return filtFrame;
|
||
}
|
||
error:
|
||
return nullptr;
|
||
}
|