multimedia/client/qt_gl_/yuvgl/media/audiocaptureff.cpp

#include "audiocaptureff.h"

#ifdef __MINGW32__
std::string WString2String(const std::wstring& ws)
{
    std::string strLocale = setlocale(LC_ALL, "");
    const wchar_t* wchSrc = ws.c_str();
    size_t nDestSize = wcstombs(NULL, wchSrc, 0) + 1;
    char *chDest = new char[nDestSize];
    memset(chDest, 0, nDestSize);
    wcstombs(chDest, wchSrc, nDestSize);
    std::string strResult = chDest;
    delete[]chDest;
    setlocale(LC_ALL, strLocale.c_str());
    return strResult;
}
#endif

vector<CaptureAudioFfmpeg::MICInfo> CaptureAudioFfmpeg::EnumSpeakers()
{
    vector<CaptureAudioFfmpeg::MICInfo> ret;
    std::vector<std::wstring> names;
    IEnumMoniker *pEnum = nullptr;
    // Create the System Device Enumerator.
    ICreateDevEnum *pDevEnum;
    HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, nullptr,
        CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pDevEnum));

    if (SUCCEEDED(hr))
    {
        // Create an enumerator for the category.
        hr = pDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEnum, 0);
        if (hr == S_FALSE)
        {
            hr = VFW_E_NOT_FOUND;  // The category is empty. Treat as an error.
        }
        pDevEnum->Release();
    }

    if (!SUCCEEDED(hr))
        return ret;

    IMoniker *pMoniker = nullptr;
    while (pEnum->Next(1, &pMoniker, nullptr) == S_OK)
    {
        IPropertyBag *pPropBag;
        IBindCtx* bindCtx = nullptr;
        LPOLESTR str = nullptr;
        VARIANT var;
        VariantInit(&var);

        HRESULT hr = pMoniker->BindToStorage(0, 0, IID_PPV_ARGS(&pPropBag));
        if (FAILED(hr))
        {
            pMoniker->Release();
            continue;
        }

        // Get description or friendly name.
        hr = pPropBag->Read(L"Description", &var, 0);
        if (FAILED(hr))
        {
            hr = pPropBag->Read(L"FriendlyName", &var, 0);
        }
        if (SUCCEEDED(hr))
        {
            names.push_back(var.bstrVal);
            CaptureAudioFfmpeg::MICInfo ele;
            ele.name = var.bstrVal;
            ret.push_back(ele);
            VariantClear(&var);
        }

        pPropBag->Release();
        pMoniker->Release();
    }

    pEnum->Release();

    return ret;
}


CaptureAudioFfmpeg::CaptureAudioFfmpeg(uint16_t rate, uint8_t channel)
{
    mSampleRate = rate;
    mChanel = channel;

}


static char *dup_wchar_to_utf8(wchar_t *w)
{
    char *s = NULL;
    int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
    s = (char *)av_malloc(l);
    if (s)
        WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
    return s;
}

int CaptureAudioFfmpeg::InitCapture(wstring url, uint16_t rate, uint8_t channel)
{
    string fileAudioInput = dup_wchar_to_utf8((wchar_t *)url.c_str());
    AVInputFormat* imft = av_find_input_format("dshow");
    AVDictionary *format_opts = nullptr;
    av_dict_set_int(&format_opts, "audio_buffer_size", 20, 0);
    if (0 > avformat_open_input(&mInfmt_ctx, fileAudioInput.c_str(), imft, &format_opts)) {
        printf("failed input file\n");
        return -1;
    }
    if (0 > avformat_find_stream_info(mInfmt_ctx, NULL)) {
        printf("failed find stream info\n");
        avformat_close_input(&mInfmt_ctx);
        return -1;
    }
        int audio_index = -1;
        audio_index = av_find_best_stream(mInfmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
        if (-1 == audio_index) {
            printf("failed find best stream\n");
            avformat_close_input(&mInfmt_ctx);
            return -1;
        }
        //av_dump_format(infmt_ctx, 0, fileAudioInput.c_str(), 1);
        //END<4E><44><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>

        //<2F>򿪽<EFBFBD><F2BFAABD><EFBFBD><EFBFBD><EFBFBD>
        static AVCodec* decodec = avcodec_find_decoder(mInfmt_ctx->streams[0]->codec->codec_id);
        if (!decodec) {
            printf("failed find decoder\n");
            return -1;
        }
        if (0 > avcodec_open2(mInfmt_ctx->streams[0]->codec, decodec, NULL)) {
            printf("failed open decoder\n");
            return -1;
        }
        //END<4E><44><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
        //<2F>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>
        initAudioFilters();
        //END<4E>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>
        //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
        static AVCodec* codec = NULL;
        //codec = avcodec_find_encoder_by_name("libmp3lame");
        codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
        static AVCodecContext* codec_ctx = NULL;
        codec_ctx = avcodec_alloc_context3(codec);
        //	codec_ctx->bit_rate = 64000;
        //	inputContext->streams[0]->codec
        codec_ctx->codec = codec;
        codec_ctx->sample_rate = 48000;
        codec_ctx->channel_layout = 3;
        codec_ctx->channels = 2;
        //codec_ctx->frame_size = 1024;
        codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
        codec_ctx->codec_tag = 0;
        codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

        if (0 > avcodec_open2(codec_ctx, codec, NULL)) {
            printf("failed open coder\n");
            avformat_close_input(&mInfmt_ctx);
            avcodec_free_context(&codec_ctx);
            return -1;
        }
        //END<4E><44><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
        //<2F><><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
        AVFormatContext* outfmt_ctx = NULL;
        if (0 > avformat_alloc_output_context2(&outfmt_ctx, NULL, NULL, "aac.aac")) {
            printf("failed alloc outputcontext\n");
            avformat_close_input(&mInfmt_ctx);
            avcodec_free_context(&codec_ctx);
            return -1;
        }
        AVStream* out_stream = avformat_new_stream(outfmt_ctx, codec_ctx->codec);
        if (!out_stream) {
            printf("failed new stream\n");
            avformat_close_input(&mInfmt_ctx);
            avcodec_free_context(&codec_ctx);
            avformat_close_input(&outfmt_ctx);
            return -1;
        }
        avcodec_copy_context(out_stream->codec, codec_ctx);
    //	if (0 > avio_open(&outfmt_ctx->pb, "rtmp://localhost/testlive", AVIO_FLAG_WRITE)) {
        if (0 > avio_open(&outfmt_ctx->pb, "aac.aac", AVIO_FLAG_WRITE)) {
            printf("failed to open outfile\n");
            avformat_close_input(&mInfmt_ctx);
            avcodec_free_context(&codec_ctx);
            avformat_close_input(&outfmt_ctx);
            return -1;
        }
        avformat_write_header(outfmt_ctx, NULL);
        //END<4E><44><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
    #if 0
        AVFrame* Frame = av_frame_alloc();
        Frame->nb_samples = codec_ctx->frame_size;
        Frame->format = codec_ctx->sample_fmt;
        Frame->channel_layout = codec_ctx->channel_layout;
        int size = av_samples_get_buffer_size(NULL, codec_ctx->channels, codec_ctx->frame_size,
            codec_ctx->sample_fmt, 1);
        uint8_t* frame_buf = (uint8_t *)av_malloc(size);
        avcodec_fill_audio_frame(Frame, codec_ctx->channels, codec_ctx->sample_fmt, (const uint8_t*)frame_buf, size, 1);
        int64_t in_channel_layout = av_get_default_channel_layout(codec_ctx->channels);
        AVPacket pkt;
        av_new_packet(&pkt, size);
        pkt.data = NULL;
        int got_frame = -1;
        int delayedFrame = 0;
        static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
        int audioCount = 0;
        const uint8_t *indata[AV_NUM_DATA_POINTERS] = { 0 };
        AVFrame* Frame1 = av_frame_alloc();
    #endif
        int loop = 1;
        int delayedFrame = 0;
        AVPacket packet;
        av_init_packet(&packet);
        packet.data = NULL;
        packet.size = 0;
        AVPacket pkt;
        av_init_packet(&pkt);
        pkt.data = NULL;
        pkt.size = 0;

        AVFrame* pSrcAudioFrame = av_frame_alloc();
        int got_frame = 0;

        while (1) {
            av_read_frame(mInfmt_ctx, &packet);
            loop++;
            if (packet.stream_index == audio_index) {
                auto filterFrame = DecodeAudio(&packet, pSrcAudioFrame);
                if (filterFrame) {
                    avcodec_encode_audio2(codec_ctx, &pkt, filterFrame, &got_frame);
                    if (got_frame) {
    #if 1
                        auto streamTimeBase = outfmt_ctx->streams[pkt.stream_index]->time_base.den;
                        auto codecTimeBase = outfmt_ctx->streams[pkt.stream_index]->codec->time_base.den;
                        pkt.pts = pkt.dts = (1024 * streamTimeBase * mAudioCount) / codecTimeBase;
                        mAudioCount++;
                        auto inputStream = mInfmt_ctx->streams[pkt.stream_index];
                        auto outputStream = outfmt_ctx->streams[pkt.stream_index];
                        av_packet_rescale_ts(&pkt, inputStream->time_base, outputStream->time_base);
    #endif
                    //	pkt.stream_index = out_stream->index;
                        av_interleaved_write_frame(outfmt_ctx, &pkt);
                        av_packet_unref(&pkt);
                        printf("output frame %3d\n", loop - delayedFrame);
                    }
                    else {
                        delayedFrame++;
                        av_packet_unref(&pkt);
                        printf("no output frame\n");
                    }
                }
            }
            av_packet_unref(&packet);
        }
        flush_encoder(outfmt_ctx, 0);
        av_write_trailer(outfmt_ctx);
        //av_free(Frame);
        av_free(pSrcAudioFrame);
        avio_close(outfmt_ctx->pb);
        avformat_close_input(&mInfmt_ctx);
        //avformat_close_input(&outfmt_ctx);
        return 0;
}

int CaptureAudioFfmpeg::initAudioFilters()
{
    char args[512];
        int ret;
        AVFilter *abuffersrc = (AVFilter *)avfilter_get_by_name("abuffer");
        AVFilter *abuffersink = (AVFilter *)avfilter_get_by_name("abuffersink");
        AVFilterInOut *outputs = avfilter_inout_alloc();
        AVFilterInOut *inputs = avfilter_inout_alloc();

        auto audioDecoderContext = mInfmt_ctx->streams[0]->codec;
        if (!audioDecoderContext->channel_layout)
            audioDecoderContext->channel_layout = av_get_default_channel_layout(audioDecoderContext->channels);

        static const enum AVSampleFormat out_sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
        static const uint64_t out_channel_layouts[] = { audioDecoderContext->channel_layout};
        static const int out_sample_rates[] = { audioDecoderContext->sample_rate , -1 };

        AVRational time_base = mInfmt_ctx->streams[0]->time_base;
        mFilterGraph = avfilter_graph_alloc();
        mFilterGraph->nb_threads = 1;

        sprintf_s(args, sizeof(args),
            "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
            time_base.num, time_base.den, audioDecoderContext->sample_rate,
            av_get_sample_fmt_name(audioDecoderContext->sample_fmt),
                  audioDecoderContext->channel_layout);

        ret = avfilter_graph_create_filter(&mBuffersrcCtx, abuffersrc, "in",
            args, NULL, mFilterGraph);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n");
            return ret;
        }

        /* buffer audio sink: to terminate the filter chain. */
        ret = avfilter_graph_create_filter(&mBuffersinkCtx, abuffersink, "out",
            NULL, NULL, mFilterGraph);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n");
            return ret;
        }

        ret = av_opt_set_int_list(mBuffersinkCtx, "sample_fmts", out_sample_fmts, -1,
            AV_OPT_SEARCH_CHILDREN);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n");
            return ret;
        }

        ret = av_opt_set_int_list(mBuffersinkCtx, "channel_layouts", out_channel_layouts, -1,
            AV_OPT_SEARCH_CHILDREN);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n");
            return ret;
        }

        ret = av_opt_set_int_list(mBuffersinkCtx, "sample_rates", out_sample_rates, -1,
            AV_OPT_SEARCH_CHILDREN);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n");
            return ret;
        }

        /* Endpoints for the filter graph. */
        outputs->name = av_strdup("in");
        outputs->filter_ctx = mBuffersrcCtx;;
        outputs->pad_idx = 0;
        outputs->next = NULL;

        inputs->name = av_strdup("out");
        inputs->filter_ctx = mBuffersinkCtx;
        inputs->pad_idx = 0;
        inputs->next = NULL;

        if ((ret = avfilter_graph_parse_ptr(mFilterGraph, "anull",
            &inputs, &outputs, nullptr)) < 0)
            return ret;

        if ((ret = avfilter_graph_config(mFilterGraph, NULL)) < 0)
            return ret;

        av_buffersink_set_frame_size(mBuffersinkCtx, 1024);
        return 0;
}

int CaptureAudioFfmpeg::flush_encoder(AVFormatContext *fmt_ctx, unsigned int stream_index)
{
    int ret;
        int got_frame;
        AVPacket enc_pkt;
        if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
            0x0020))
            return 0;
        while (1) {
            enc_pkt.data = NULL;
            enc_pkt.size = 0;
            av_init_packet(&enc_pkt);
            ret = avcodec_encode_audio2(fmt_ctx->streams[stream_index]->codec, &enc_pkt,
                NULL, &got_frame);
            av_frame_free(NULL);
            if (ret < 0)
                break;
            if (!got_frame) {
                ret = 0;
                break;
            }
            printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n", enc_pkt.size);
            /* mux encoded frame */
            ret = av_write_frame(fmt_ctx, &enc_pkt);
            if (ret < 0)
                break;
        }
        return ret;


}

AVFrame *CaptureAudioFfmpeg::DecodeAudio(AVPacket *packet, AVFrame *pSrcAudioFrame)
{
    AVStream * stream = mInfmt_ctx->streams[0];
        AVCodecContext* codecContext = stream->codec;
        int gotFrame;
        AVFrame *filtFrame = nullptr;
        auto length = avcodec_decode_audio4(codecContext, pSrcAudioFrame, &gotFrame, packet);
        if (length >= 0 && gotFrame != 0)
        {
            if (av_buffersrc_add_frame_flags(mBuffersrcCtx, pSrcAudioFrame, AV_BUFFERSRC_FLAG_PUSH) < 0) {
                av_log(NULL, AV_LOG_ERROR, "buffe src add frame error!\n");
                return nullptr;
            }

            filtFrame = av_frame_alloc();
            int ret = av_buffersink_get_frame_flags(mBuffersinkCtx, filtFrame, AV_BUFFERSINK_FLAG_NO_REQUEST);
            if (ret < 0)
            {
                av_frame_free(&filtFrame);
                goto error;
            }
            return filtFrame;
        }
    error:
        return nullptr;
}