在.NET中使用Speex -- 音频数据编解码

时间：2014-08-21 17:10:04 阅读：696 评论：0 收藏：0 [点我收藏+]

Speex是一套开源的音频编解码库，最新版本还包含了回音消除和防抖动等功能，如果我们想开发语音聊天或视频会议这样的系统，Speex将是一个不错的选择。到 http://www.speex.org可以下载Speex的源码（编译后的dll为libspeex.dll），最新版本为1.2。不过源码是用C++开发的，直接在.NET中使用会有诸多不便，为此，我用C#将其封装，使得编解码的调用相当简单。

　　由于Speex原始导出的API不是很方便C#调用，所以，在用C#封装之前，先要用C++对Speex的原始API进行简化，新建一个名为Speex的VC项目，然后引用libspeex.dll的相关库文件，添加cpp文件后，复制下列源码到文件中：

#include "speex\speex.h"

#include <windows.h>

#include <stdio.h>

#include <stdlib.h>

#include "speex/speex_echo.h"

#include "speex/speex_preprocess.h" 

#include "Speex.h"

#define FRAME_SIZE 160

float encoder_input[FRAME_SIZE];

void *encoder_state;

SpeexBits encoder_bits;

BOOL APIENTRY DllMain( HANDLE hModule, 

                       DWORD  ul_reason_for_call, 

                       LPVOID lpReserved

                     )

{

    return TRUE;

}
 
extern "C" __declspec(dllexport) void encoder_init(int quality)

{

    encoder_state = speex_encoder_init(&speex_nb_mode);

    speex_encoder_ctl(encoder_state, SPEEX_SET_QUALITY, &quality);

    speex_bits_init(&encoder_bits);

}

extern "C" __declspec(dllexport) void encoder_dispose()

{

    speex_encoder_destroy(encoder_state);

    speex_bits_destroy(&encoder_bits);

}

extern "C" __declspec(dllexport) int encoder_encode(const short *data, char *output)

{

    for (int i = 0; i < FRAME_SIZE; i++)

        encoder_input[i] = data[i];

    speex_bits_reset(&encoder_bits);

    speex_encode(encoder_state, encoder_input, &encoder_bits);

    return speex_bits_write(&encoder_bits, output, 200);

}

float decoder_output[FRAME_SIZE];

void *decoder_state;

SpeexBits decoder_bits;

extern "C" __declspec(dllexport) void decoder_init()

{

    decoder_state = speex_decoder_init(&speex_nb_mode);

    int tmp = 1;

    speex_decoder_ctl(decoder_state, SPEEX_SET_ENH, &tmp);

    speex_bits_init(&decoder_bits);

}

extern "C" __declspec(dllexport) void decoder_dispose()

{

    speex_decoder_destroy(decoder_state);

    speex_bits_destroy(&decoder_bits);

}

extern "C" __declspec(dllexport) void decoder_decode(int nbBytes, char *data, short *output)

{

    speex_bits_read_from(&decoder_bits, data, nbBytes);

    speex_decode(decoder_state, &decoder_bits, decoder_output);

    for (int i = 0; i < FRAME_SIZE; i++)

    {

        output[i] = decoder_output[i];

    }

}

/***************************************************  回音消除 **************************************/

bool      m_bSpeexEchoHasInit;

SpeexEchoState*   m_SpeexEchoState;

SpeexPreprocessState* m_pPreprocessorState;

int      m_nFilterLen;

int      m_nSampleRate;

float*   m_pfNoise;

extern "C" __declspec(dllexport) void SpeexEchoCapture(short* input_frame, short* output_frame)

{

    speex_echo_capture(m_SpeexEchoState, input_frame, output_frame);

}

extern "C" __declspec(dllexport) void SpeexEchoPlayback(short* echo_frame)

{

    speex_echo_playback(m_SpeexEchoState, echo_frame);

}

extern "C" __declspec(dllexport) void SpeexEchoReset()

{

    if (m_SpeexEchoState != NULL)

    {

        speex_echo_state_destroy(m_SpeexEchoState);

        m_SpeexEchoState = NULL;

    }

    if (m_pPreprocessorState != NULL)

    {

        speex_preprocess_state_destroy(m_pPreprocessorState);

        m_pPreprocessorState = NULL;

    }

    if (m_pfNoise != NULL)

    {

        delete []m_pfNoise;

        m_pfNoise = NULL;

    }

    m_bSpeexEchoHasInit = false;

}

extern "C" __declspec(dllexport) void SpeexEchoInit(int filter_length, int sampling_rate ,bool associatePreprocesser)

{

    SpeexEchoReset(); 

    if (filter_length<=0 || sampling_rate<=0)

    {

      m_nFilterLen  = 160*8;

      m_nSampleRate = 8000;

    }

    else

    {

      m_nFilterLen  = filter_length;

      m_nSampleRate = sampling_rate;

    }

    m_SpeexEchoState = speex_echo_state_init(FRAME_SIZE, m_nFilterLen);

    m_pPreprocessorState = speex_preprocess_state_init(FRAME_SIZE, m_nSampleRate);

    if(associatePreprocesser)

    {

        speex_preprocess_ctl(m_pPreprocessorState, SPEEX_PREPROCESS_SET_ECHO_STATE,m_SpeexEchoState);

    }

    m_pfNoise = new float[FRAME_SIZE+1];

    m_bSpeexEchoHasInit = true;

}

extern "C" __declspec(dllexport) void SpeexEchoDoAEC(short* mic, short* ref, short* out)
{
    if (!m_bSpeexEchoHasInit)
    {
      return;
    }

    speex_echo_cancellation(m_SpeexEchoState,(const __int16 *) mic,(const __int16 *) ref,(__int16 *) out);
    }

　　编译便生成Speex.dll。

　　如果对VC不熟悉也没关系，文末会直接给出libspeex.dll和Speex.dll的下载，直接使用就OK了。

　　现在，C#可以调用Speex.dll导出的简单函数了，最终封装的源码如下：

    /// <summary>

    /// 对Speex的C#封装。

    /// zhuweisky 2010.05.13

    /// </summary>

    public class Speex :IAudioCodec

    {

        private const int FrameSize = 160;

        #region IsDisposed

        private volatile bool isDisposed = false;

        public bool IsDisposed

        {

            get { return isDisposed; }

        } 

        #endregion

        #region Ctor

        /// <summary>

        /// 初始化。

        /// </summary>

        /// <param name="quality">编码质量，取值0~10</param>

        public Speex(int quality)

        {

            if (quality < 0 || quality > 10)

            {

                throw new Exception("quality value must be between 0 and 10.");

            }

            Speex.encoder_init(quality);

            Speex.decoder_init();

        }

        #endregion

        #region Dispose

        public void Dispose()

        {

            this.isDisposed = true;

            System.Threading.Thread.Sleep(100);

            Speex.decoder_dispose();

            Speex.encoder_dispose();

        }

        #endregion

        #region Encode

        /// <summary>

        /// 将采集到的音频数据进行编码。

        /// </summary>       

        public byte[] Encode(byte[] data)

        {

            if (this.isDisposed)

            {

                return null;

            }

            if (data.Length % (FrameSize * 2) != 0)

            {

                throw new ArgumentException("Invalid Data Length.");

            }

            int nbBytes;

            short[] input = new short[FrameSize];

            byte[] buffer = new byte[200];

            byte[] output = new byte[0];

            for (int i = 0; i < data.Length / (FrameSize * 2); i++)

            {

                for (int j = 0; j < input.Length; j++)

                {

                    input[j] = (short)(data[i * FrameSize * 2 + j * 2] + data[i * FrameSize * 2 + j * 2 + 1] * 0x100);

                }

                nbBytes = Speex.encoder_encode(input, buffer);

                Array.Resize<byte>(ref output, output.Length + nbBytes + sizeof(int));

                Array.Copy(buffer, 0, output, output.Length - nbBytes, nbBytes);

                for (int j = 0; j < sizeof(int); j++)

                {

                    output[output.Length - nbBytes - sizeof(int) + j] = (byte)(nbBytes % 0x100);

                    nbBytes /= 0x100;

                }

            }

            return output;

        }

        #endregion

        #region Decode

        /// <summary>

        /// 将编码后的数据进行解码得到原始的音频数据。

        /// </summary>      

        public byte[] Decode(byte[] data)

        {

            if (this.isDisposed)

            {

                return null;

            }

            int nbBytes, index = 0;

            byte[] input;

            short[] buffer = new short[FrameSize];

            byte[] output = new byte[0];

            while (index < data.Length)

            {

                nbBytes = 0;

                index += sizeof(int);

                for (int i = 1; i <= sizeof(int); i++)

                    nbBytes = nbBytes * 0x100 + data[index - i];

                input = new byte[nbBytes];

                Array.Copy(data, index, input, 0, input.Length);

                index += input.Length;

                Speex.decoder_decode(nbBytes, input, buffer);

                Array.Resize<byte>(ref output, output.Length + FrameSize * 2);

                for (int i = 0; i < FrameSize; i++)

                {

                    output[output.Length - FrameSize * 2 + i * 2] = (byte)(buffer[i] % 0x100);

                    output[output.Length - FrameSize * 2 + i * 2 + 1] = (byte)(buffer[i] / 0x100);

                }

            }

            return output;

        }

        #endregion

        #region Pinvoke

        [DllImport("Speex.dll", EntryPoint = "encoder_init")]

        internal extern static void encoder_init(int quality);

        [DllImport("Speex.dll", EntryPoint = "encoder_dispose")]

        internal extern static void encoder_dispose();

        [DllImport("Speex.dll", EntryPoint = "encoder_encode")]

        internal extern static int encoder_encode(short[] data, byte[] output);

        [DllImport("Speex.dll", EntryPoint = "decoder_init")]

        internal extern static void decoder_init();

        [DllImport("Speex.dll", EntryPoint = "decoder_dispose")]

        internal extern static void decoder_dispose();

        [DllImport("Speex.dll", EntryPoint = "decoder_decode")]

        internal extern static void decoder_decode(int nbBytes, byte[] data, short[] output);      

        #endregion

    }

只有四个方法：Initialize、Encode、Decode、Dispose。方法参数的含义也非常明显。

　一般音频对话的整个流程是这样的：采集 -> 编码 -> 网络传输 -> 解码 -> 播放。

　而该封装的Speex类解决了这个过程中的音频编码和解码的问题。你可以复制该源码到你的项目，并将从http://www.speex.org下载的speex.dll放到运行目录下，就可以正常地使用SPEEX的编解码功能了。

　关于Speex更高级的功能，我正在研究中，有兴趣的朋友可以email给我一起探讨。　　

Speex dll 可以到官网下载页面下载。

注：我们的研究成果已经全部集成到了OMCS中，其支持回音消除（AEC）、静音检测（VAD）、噪音抑制（DENOISE）、自动增益（AGC）等网络语音技术，有兴趣的可以了解一下。

在.NET中使用Speex -- 音频数据编解码,布布扣,bubuko.com

在.NET中使用Speex -- 音频数据编解码

原文：http://blog.csdn.net/zhuweisky/article/details/38732481

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)