407 lines
14 KiB
C++
407 lines
14 KiB
C++
#include "audiocaptureff.h"
|
|
|
|
|
|
std::string WString2String(const std::wstring& ws)
|
|
{
|
|
std::string strLocale = setlocale(LC_ALL, "");
|
|
const wchar_t* wchSrc = ws.c_str();
|
|
size_t nDestSize = wcstombs(NULL, wchSrc, 0) + 1;
|
|
char *chDest = new char[nDestSize];
|
|
memset(chDest, 0, nDestSize);
|
|
wcstombs(chDest, wchSrc, nDestSize);
|
|
std::string strResult = chDest;
|
|
delete[]chDest;
|
|
setlocale(LC_ALL, strLocale.c_str());
|
|
return strResult;
|
|
}
|
|
vector<CaptureAudioFfmpeg::MICInfo> CaptureAudioFfmpeg::EnumSpeakers()
|
|
{
|
|
vector<CaptureAudioFfmpeg::MICInfo> ret;
|
|
std::vector<std::wstring> names;
|
|
IEnumMoniker *pEnum = nullptr;
|
|
// Create the System Device Enumerator.
|
|
ICreateDevEnum *pDevEnum;
|
|
HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, nullptr,
|
|
CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pDevEnum));
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
// Create an enumerator for the category.
|
|
hr = pDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEnum, 0);
|
|
if (hr == S_FALSE)
|
|
{
|
|
hr = VFW_E_NOT_FOUND; // The category is empty. Treat as an error.
|
|
}
|
|
pDevEnum->Release();
|
|
}
|
|
|
|
if (!SUCCEEDED(hr))
|
|
return ret;
|
|
|
|
IMoniker *pMoniker = nullptr;
|
|
while (pEnum->Next(1, &pMoniker, nullptr) == S_OK)
|
|
{
|
|
IPropertyBag *pPropBag;
|
|
IBindCtx* bindCtx = nullptr;
|
|
LPOLESTR str = nullptr;
|
|
VARIANT var;
|
|
VariantInit(&var);
|
|
|
|
HRESULT hr = pMoniker->BindToStorage(0, 0, IID_PPV_ARGS(&pPropBag));
|
|
if (FAILED(hr))
|
|
{
|
|
pMoniker->Release();
|
|
continue;
|
|
}
|
|
|
|
// Get description or friendly name.
|
|
hr = pPropBag->Read(L"Description", &var, 0);
|
|
if (FAILED(hr))
|
|
{
|
|
hr = pPropBag->Read(L"FriendlyName", &var, 0);
|
|
}
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
names.push_back(var.bstrVal);
|
|
CaptureAudioFfmpeg::MICInfo ele;
|
|
ele.name = var.bstrVal;
|
|
ret.push_back(ele);
|
|
VariantClear(&var);
|
|
}
|
|
|
|
pPropBag->Release();
|
|
pMoniker->Release();
|
|
}
|
|
|
|
pEnum->Release();
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
CaptureAudioFfmpeg::CaptureAudioFfmpeg(uint16_t rate, uint8_t channel)
|
|
{
|
|
mSampleRate = rate;
|
|
mChanel = channel;
|
|
|
|
}
|
|
|
|
|
|
static char *dup_wchar_to_utf8(wchar_t *w)
|
|
{
|
|
char *s = NULL;
|
|
int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
|
|
s = (char *)av_malloc(l);
|
|
if (s)
|
|
WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
|
|
return s;
|
|
}
|
|
|
|
int CaptureAudioFfmpeg::InitCapture(wstring url, uint16_t rate, uint8_t channel)
|
|
{
|
|
string fileAudioInput = dup_wchar_to_utf8((wchar_t *)url.c_str());
|
|
AVInputFormat* imft = av_find_input_format("dshow");
|
|
AVDictionary *format_opts = nullptr;
|
|
av_dict_set_int(&format_opts, "audio_buffer_size", 20, 0);
|
|
if (0 > avformat_open_input(&mInfmt_ctx, fileAudioInput.c_str(), imft, &format_opts)) {
|
|
printf("failed input file\n");
|
|
return -1;
|
|
}
|
|
if (0 > avformat_find_stream_info(mInfmt_ctx, NULL)) {
|
|
printf("failed find stream info\n");
|
|
avformat_close_input(&mInfmt_ctx);
|
|
return -1;
|
|
}
|
|
int audio_index = -1;
|
|
audio_index = av_find_best_stream(mInfmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
|
|
if (-1 == audio_index) {
|
|
printf("failed find best stream\n");
|
|
avformat_close_input(&mInfmt_ctx);
|
|
return -1;
|
|
}
|
|
//av_dump_format(infmt_ctx, 0, fileAudioInput.c_str(), 1);
|
|
//END输入文件
|
|
|
|
//打开解码器
|
|
static AVCodec* decodec = avcodec_find_decoder(mInfmt_ctx->streams[0]->codec->codec_id);
|
|
if (!decodec) {
|
|
printf("failed find decoder\n");
|
|
return -1;
|
|
}
|
|
if (0 > avcodec_open2(mInfmt_ctx->streams[0]->codec, decodec, NULL)) {
|
|
printf("failed open decoder\n");
|
|
return -1;
|
|
}
|
|
//END解码器
|
|
//重采样初始化
|
|
initAudioFilters();
|
|
//END重采样初始化
|
|
//编码器
|
|
static AVCodec* codec = NULL;
|
|
//codec = avcodec_find_encoder_by_name("libmp3lame");
|
|
codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
|
|
static AVCodecContext* codec_ctx = NULL;
|
|
codec_ctx = avcodec_alloc_context3(codec);
|
|
// codec_ctx->bit_rate = 64000;
|
|
// inputContext->streams[0]->codec
|
|
codec_ctx->codec = codec;
|
|
codec_ctx->sample_rate = 48000;
|
|
codec_ctx->channel_layout = 3;
|
|
codec_ctx->channels = 2;
|
|
//codec_ctx->frame_size = 1024;
|
|
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
|
|
codec_ctx->codec_tag = 0;
|
|
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
|
|
|
|
if (0 > avcodec_open2(codec_ctx, codec, NULL)) {
|
|
printf("failed open coder\n");
|
|
avformat_close_input(&mInfmt_ctx);
|
|
avcodec_free_context(&codec_ctx);
|
|
return -1;
|
|
}
|
|
//END编码器
|
|
//输出文件
|
|
AVFormatContext* outfmt_ctx = NULL;
|
|
if (0 > avformat_alloc_output_context2(&outfmt_ctx, NULL, NULL, "aac.aac")) {
|
|
printf("failed alloc outputcontext\n");
|
|
avformat_close_input(&mInfmt_ctx);
|
|
avcodec_free_context(&codec_ctx);
|
|
return -1;
|
|
}
|
|
AVStream* out_stream = avformat_new_stream(outfmt_ctx, codec_ctx->codec);
|
|
if (!out_stream) {
|
|
printf("failed new stream\n");
|
|
avformat_close_input(&mInfmt_ctx);
|
|
avcodec_free_context(&codec_ctx);
|
|
avformat_close_input(&outfmt_ctx);
|
|
return -1;
|
|
}
|
|
avcodec_copy_context(out_stream->codec, codec_ctx);
|
|
// if (0 > avio_open(&outfmt_ctx->pb, "rtmp://localhost/testlive", AVIO_FLAG_WRITE)) {
|
|
if (0 > avio_open(&outfmt_ctx->pb, "aac.aac", AVIO_FLAG_WRITE)) {
|
|
printf("failed to open outfile\n");
|
|
avformat_close_input(&mInfmt_ctx);
|
|
avcodec_free_context(&codec_ctx);
|
|
avformat_close_input(&outfmt_ctx);
|
|
return -1;
|
|
}
|
|
avformat_write_header(outfmt_ctx, NULL);
|
|
//END输出文件
|
|
#if 0
|
|
AVFrame* Frame = av_frame_alloc();
|
|
Frame->nb_samples = codec_ctx->frame_size;
|
|
Frame->format = codec_ctx->sample_fmt;
|
|
Frame->channel_layout = codec_ctx->channel_layout;
|
|
int size = av_samples_get_buffer_size(NULL, codec_ctx->channels, codec_ctx->frame_size,
|
|
codec_ctx->sample_fmt, 1);
|
|
uint8_t* frame_buf = (uint8_t *)av_malloc(size);
|
|
avcodec_fill_audio_frame(Frame, codec_ctx->channels, codec_ctx->sample_fmt, (const uint8_t*)frame_buf, size, 1);
|
|
int64_t in_channel_layout = av_get_default_channel_layout(codec_ctx->channels);
|
|
AVPacket pkt;
|
|
av_new_packet(&pkt, size);
|
|
pkt.data = NULL;
|
|
int got_frame = -1;
|
|
int delayedFrame = 0;
|
|
static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
|
|
int audioCount = 0;
|
|
const uint8_t *indata[AV_NUM_DATA_POINTERS] = { 0 };
|
|
AVFrame* Frame1 = av_frame_alloc();
|
|
#endif
|
|
int loop = 1;
|
|
int delayedFrame = 0;
|
|
AVPacket packet;
|
|
av_init_packet(&packet);
|
|
packet.data = NULL;
|
|
packet.size = 0;
|
|
AVPacket pkt;
|
|
av_init_packet(&pkt);
|
|
pkt.data = NULL;
|
|
pkt.size = 0;
|
|
|
|
AVFrame* pSrcAudioFrame = av_frame_alloc();
|
|
int got_frame = 0;
|
|
|
|
while (1) {
|
|
av_read_frame(mInfmt_ctx, &packet);
|
|
loop++;
|
|
if (packet.stream_index == audio_index) {
|
|
auto filterFrame = DecodeAudio(&packet, pSrcAudioFrame);
|
|
if (filterFrame) {
|
|
avcodec_encode_audio2(codec_ctx, &pkt, filterFrame, &got_frame);
|
|
if (got_frame) {
|
|
#if 1
|
|
auto streamTimeBase = outfmt_ctx->streams[pkt.stream_index]->time_base.den;
|
|
auto codecTimeBase = outfmt_ctx->streams[pkt.stream_index]->codec->time_base.den;
|
|
pkt.pts = pkt.dts = (1024 * streamTimeBase * mAudioCount) / codecTimeBase;
|
|
mAudioCount++;
|
|
auto inputStream = mInfmt_ctx->streams[pkt.stream_index];
|
|
auto outputStream = outfmt_ctx->streams[pkt.stream_index];
|
|
av_packet_rescale_ts(&pkt, inputStream->time_base, outputStream->time_base);
|
|
#endif
|
|
// pkt.stream_index = out_stream->index;
|
|
av_interleaved_write_frame(outfmt_ctx, &pkt);
|
|
av_packet_unref(&pkt);
|
|
printf("output frame %3d\n", loop - delayedFrame);
|
|
}
|
|
else {
|
|
delayedFrame++;
|
|
av_packet_unref(&pkt);
|
|
printf("no output frame\n");
|
|
}
|
|
}
|
|
}
|
|
av_packet_unref(&packet);
|
|
}
|
|
flush_encoder(outfmt_ctx, 0);
|
|
av_write_trailer(outfmt_ctx);
|
|
//av_free(Frame);
|
|
av_free(pSrcAudioFrame);
|
|
avio_close(outfmt_ctx->pb);
|
|
avformat_close_input(&mInfmt_ctx);
|
|
//avformat_close_input(&outfmt_ctx);
|
|
return 0;
|
|
}
|
|
|
|
int CaptureAudioFfmpeg::initAudioFilters()
|
|
{
|
|
char args[512];
|
|
int ret;
|
|
AVFilter *abuffersrc = (AVFilter *)avfilter_get_by_name("abuffer");
|
|
AVFilter *abuffersink = (AVFilter *)avfilter_get_by_name("abuffersink");
|
|
AVFilterInOut *outputs = avfilter_inout_alloc();
|
|
AVFilterInOut *inputs = avfilter_inout_alloc();
|
|
|
|
auto audioDecoderContext = mInfmt_ctx->streams[0]->codec;
|
|
if (!audioDecoderContext->channel_layout)
|
|
audioDecoderContext->channel_layout = av_get_default_channel_layout(audioDecoderContext->channels);
|
|
|
|
static const enum AVSampleFormat out_sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
|
|
static const uint64_t out_channel_layouts[] = { audioDecoderContext->channel_layout};
|
|
static const int out_sample_rates[] = { audioDecoderContext->sample_rate , -1 };
|
|
|
|
AVRational time_base = mInfmt_ctx->streams[0]->time_base;
|
|
mFilterGraph = avfilter_graph_alloc();
|
|
mFilterGraph->nb_threads = 1;
|
|
|
|
sprintf_s(args, sizeof(args),
|
|
"time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
|
|
time_base.num, time_base.den, audioDecoderContext->sample_rate,
|
|
av_get_sample_fmt_name(audioDecoderContext->sample_fmt), audioDecoderContext->channel_layout);
|
|
|
|
ret = avfilter_graph_create_filter(&mBuffersrcCtx, abuffersrc, "in",
|
|
args, NULL, mFilterGraph);
|
|
if (ret < 0) {
|
|
av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n");
|
|
return ret;
|
|
}
|
|
|
|
/* buffer audio sink: to terminate the filter chain. */
|
|
ret = avfilter_graph_create_filter(&mBuffersinkCtx, abuffersink, "out",
|
|
NULL, NULL, mFilterGraph);
|
|
if (ret < 0) {
|
|
av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = av_opt_set_int_list(mBuffersinkCtx, "sample_fmts", out_sample_fmts, -1,
|
|
AV_OPT_SEARCH_CHILDREN);
|
|
if (ret < 0) {
|
|
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = av_opt_set_int_list(mBuffersinkCtx, "channel_layouts", out_channel_layouts, -1,
|
|
AV_OPT_SEARCH_CHILDREN);
|
|
if (ret < 0) {
|
|
av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = av_opt_set_int_list(mBuffersinkCtx, "sample_rates", out_sample_rates, -1,
|
|
AV_OPT_SEARCH_CHILDREN);
|
|
if (ret < 0) {
|
|
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n");
|
|
return ret;
|
|
}
|
|
|
|
/* Endpoints for the filter graph. */
|
|
outputs->name = av_strdup("in");
|
|
outputs->filter_ctx = mBuffersrcCtx;;
|
|
outputs->pad_idx = 0;
|
|
outputs->next = NULL;
|
|
|
|
inputs->name = av_strdup("out");
|
|
inputs->filter_ctx = mBuffersinkCtx;
|
|
inputs->pad_idx = 0;
|
|
inputs->next = NULL;
|
|
|
|
if ((ret = avfilter_graph_parse_ptr(mFilterGraph, "anull",
|
|
&inputs, &outputs, nullptr)) < 0)
|
|
return ret;
|
|
|
|
if ((ret = avfilter_graph_config(mFilterGraph, NULL)) < 0)
|
|
return ret;
|
|
|
|
av_buffersink_set_frame_size(mBuffersinkCtx, 1024);
|
|
return 0;
|
|
}
|
|
|
|
int CaptureAudioFfmpeg::flush_encoder(AVFormatContext *fmt_ctx, unsigned int stream_index)
|
|
{
|
|
int ret;
|
|
int got_frame;
|
|
AVPacket enc_pkt;
|
|
if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
|
|
0x0020))
|
|
return 0;
|
|
while (1) {
|
|
enc_pkt.data = NULL;
|
|
enc_pkt.size = 0;
|
|
av_init_packet(&enc_pkt);
|
|
ret = avcodec_encode_audio2(fmt_ctx->streams[stream_index]->codec, &enc_pkt,
|
|
NULL, &got_frame);
|
|
av_frame_free(NULL);
|
|
if (ret < 0)
|
|
break;
|
|
if (!got_frame) {
|
|
ret = 0;
|
|
break;
|
|
}
|
|
printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n", enc_pkt.size);
|
|
/* mux encoded frame */
|
|
ret = av_write_frame(fmt_ctx, &enc_pkt);
|
|
if (ret < 0)
|
|
break;
|
|
}
|
|
return ret;
|
|
|
|
|
|
}
|
|
|
|
AVFrame *CaptureAudioFfmpeg::DecodeAudio(AVPacket *packet, AVFrame *pSrcAudioFrame)
|
|
{
|
|
AVStream * stream = mInfmt_ctx->streams[0];
|
|
AVCodecContext* codecContext = stream->codec;
|
|
int gotFrame;
|
|
AVFrame *filtFrame = nullptr;
|
|
auto length = avcodec_decode_audio4(codecContext, pSrcAudioFrame, &gotFrame, packet);
|
|
if (length >= 0 && gotFrame != 0)
|
|
{
|
|
if (av_buffersrc_add_frame_flags(mBuffersrcCtx, pSrcAudioFrame, AV_BUFFERSRC_FLAG_PUSH) < 0) {
|
|
av_log(NULL, AV_LOG_ERROR, "buffe src add frame error!\n");
|
|
return nullptr;
|
|
}
|
|
|
|
filtFrame = av_frame_alloc();
|
|
int ret = av_buffersink_get_frame_flags(mBuffersinkCtx, filtFrame, AV_BUFFERSINK_FLAG_NO_REQUEST);
|
|
if (ret < 0)
|
|
{
|
|
av_frame_free(&filtFrame);
|
|
goto error;
|
|
}
|
|
return filtFrame;
|
|
}
|
|
error:
|
|
return nullptr;
|
|
}
|