Sat Aug 6 00:39:28 2011

Asterisk developer's documentation


codec_speex.c

Go to the documentation of this file.
00001 /*
00002  * Asterisk -- An open source telephony toolkit.
00003  *
00004  * Copyright (C) 1999 - 2005, Digium, Inc.
00005  *
00006  * Mark Spencer <markster@digium.com>
00007  *
00008  *
00009  * See http://www.asterisk.org for more information about
00010  * the Asterisk project. Please do not directly contact
00011  * any of the maintainers of this project for assistance;
00012  * the project provides a web site, mailing lists and IRC
00013  * channels for your use.
00014  *
00015  * This program is free software, distributed under the terms of
00016  * the GNU General Public License Version 2. See the LICENSE file
00017  * at the top of the source tree.
00018  */
00019 
00020 /*! \file
00021  *
00022  * \brief Translate between signed linear and Speex (Open Codec)
00023  *
00024  * http://www.speex.org
00025  * \note This work was motivated by Jeremy McNamara 
00026  * hacked to be configurable by anthm and bkw 9/28/2004
00027  * \ingroup codecs
00028  */
00029 
00030 /*** MODULEINFO
00031    <depend>speex</depend>
00032    <depend>speex_preprocess</depend>
00033    <use>speexdsp</use>
00034  ***/
00035 
00036 #include "asterisk.h"
00037 
00038 ASTERISK_FILE_VERSION(__FILE__, "$Revision: 211528 $")
00039 
00040 #include <fcntl.h>
00041 #include <stdlib.h>
00042 #include <unistd.h>
00043 #include <netinet/in.h>
00044 #include <string.h>
00045 #include <stdio.h>
00046 #include <speex/speex.h>
00047 
00048 /* We require a post 1.1.8 version of Speex to enable preprocessing
00049    and better type handling */   
00050 #ifdef _SPEEX_TYPES_H
00051 #include <speex/speex_preprocess.h>
00052 #endif
00053 
00054 #include "asterisk/lock.h"
00055 #include "asterisk/translate.h"
00056 #include "asterisk/module.h"
00057 #include "asterisk/config.h"
00058 #include "asterisk/options.h"
00059 #include "asterisk/logger.h"
00060 #include "asterisk/channel.h"
00061 #include "asterisk/utils.h"
00062 
00063 /* Sample frame data */
00064 #include "slin_speex_ex.h"
00065 #include "speex_slin_ex.h"
00066 
00067 /* codec variables */
00068 static int quality = 3;
00069 static int complexity = 2;
00070 static int enhancement = 0;
00071 static int vad = 0;
00072 static int vbr = 0;
00073 static float vbr_quality = 4;
00074 static int abr = 0;
00075 static int dtx = 0;  /* set to 1 to enable silence detection */
00076 
00077 static int preproc = 0;
00078 static int pp_vad = 0;
00079 static int pp_agc = 0;
00080 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
00081 static int pp_denoise = 0;
00082 static int pp_dereverb = 0;
00083 static float pp_dereverb_decay = 0.4;
00084 static float pp_dereverb_level = 0.3;
00085 
00086 #define TYPE_SILENCE  0x2
00087 #define TYPE_HIGH  0x0
00088 #define TYPE_LOW   0x1
00089 #define TYPE_MASK  0x3
00090 
00091 #define  BUFFER_SAMPLES 8000
00092 #define  SPEEX_SAMPLES  160
00093 
00094 struct speex_coder_pvt {
00095    void *speex;
00096    SpeexBits bits;
00097    int framesize;
00098    int silent_state;
00099 #ifdef _SPEEX_TYPES_H
00100    SpeexPreprocessState *pp;
00101    spx_int16_t buf[BUFFER_SAMPLES];
00102 #else
00103    int16_t buf[BUFFER_SAMPLES];  /* input, waiting to be compressed */
00104 #endif
00105 };
00106 
00107 
00108 static int lintospeex_new(struct ast_trans_pvt *pvt)
00109 {
00110    struct speex_coder_pvt *tmp = pvt->pvt;
00111 
00112    if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
00113       return -1;
00114 
00115    speex_bits_init(&tmp->bits);
00116    speex_bits_reset(&tmp->bits);
00117    speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00118    speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
00119 #ifdef _SPEEX_TYPES_H
00120    if (preproc) {
00121       tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
00122       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
00123       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
00124       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
00125       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
00126       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
00127       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
00128       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
00129    }
00130 #endif
00131    if (!abr && !vbr) {
00132       speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
00133       if (vad)
00134          speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
00135    }
00136    if (vbr) {
00137       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
00138       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
00139    }
00140    if (abr)
00141       speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
00142    if (dtx)
00143       speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx); 
00144    tmp->silent_state = 0;
00145 
00146    return 0;
00147 }
00148 
00149 static int speextolin_new(struct ast_trans_pvt *pvt)
00150 {
00151    struct speex_coder_pvt *tmp = pvt->pvt;
00152    
00153    if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
00154       return -1;
00155 
00156    speex_bits_init(&tmp->bits);
00157    speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00158    if (enhancement)
00159       speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
00160 
00161    return 0;
00162 }
00163 
00164 static struct ast_frame *lintospeex_sample(void)
00165 {
00166    static struct ast_frame f;
00167    f.frametype = AST_FRAME_VOICE;
00168    f.subclass = AST_FORMAT_SLINEAR;
00169    f.datalen = sizeof(slin_speex_ex);
00170    /* Assume 8000 Hz */
00171    f.samples = sizeof(slin_speex_ex)/2;
00172    f.mallocd = 0;
00173    f.offset = 0;
00174    f.src = __PRETTY_FUNCTION__;
00175    f.data = slin_speex_ex;
00176    return &f;
00177 }
00178 
00179 static struct ast_frame *speextolin_sample(void)
00180 {
00181    static struct ast_frame f;
00182    f.frametype = AST_FRAME_VOICE;
00183    f.subclass = AST_FORMAT_SPEEX;
00184    f.datalen = sizeof(speex_slin_ex);
00185    /* All frames are 20 ms long */
00186    f.samples = SPEEX_SAMPLES;
00187    f.mallocd = 0;
00188    f.offset = 0;
00189    f.src = __PRETTY_FUNCTION__;
00190    f.data = speex_slin_ex;
00191    return &f;
00192 }
00193 
00194 /*! \brief convert and store into outbuf */
00195 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00196 {
00197    struct speex_coder_pvt *tmp = pvt->pvt;
00198 
00199    /* Assuming there's space left, decode into the current buffer at
00200       the tail location.  Read in as many frames as there are */
00201    int x;
00202    int res;
00203    int16_t *dst = (int16_t *)pvt->outbuf;
00204    /* XXX fout is a temporary buffer, may have different types */
00205 #ifdef _SPEEX_TYPES_H
00206    spx_int16_t fout[1024];
00207 #else
00208    float fout[1024];
00209 #endif
00210 
00211    if (f->datalen == 0) {  /* Native PLC interpolation */
00212       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00213          ast_log(LOG_WARNING, "Out of buffer space\n");
00214          return -1;
00215       }
00216 #ifdef _SPEEX_TYPES_H
00217       speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
00218 #else
00219       speex_decode(tmp->speex, NULL, fout);
00220       for (x=0;x<tmp->framesize;x++) {
00221          dst[pvt->samples + x] = (int16_t)fout[x];
00222       }
00223 #endif
00224       pvt->samples += tmp->framesize;
00225       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00226       return 0;
00227    }
00228 
00229    /* Read in bits */
00230    speex_bits_read_from(&tmp->bits, f->data, f->datalen);
00231    for (;;) {
00232 #ifdef _SPEEX_TYPES_H
00233       res = speex_decode_int(tmp->speex, &tmp->bits, fout);
00234 #else
00235       res = speex_decode(tmp->speex, &tmp->bits, fout);
00236 #endif
00237       if (res < 0)
00238          break;
00239       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00240          ast_log(LOG_WARNING, "Out of buffer space\n");
00241          return -1;
00242       }
00243       for (x = 0 ; x < tmp->framesize; x++)
00244          dst[pvt->samples + x] = (int16_t)fout[x];
00245       pvt->samples += tmp->framesize;
00246       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00247    }
00248    return 0;
00249 }
00250 
00251 /*! \brief store input frame in work buffer */
00252 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00253 {
00254    struct speex_coder_pvt *tmp = pvt->pvt;
00255 
00256    /* XXX We should look at how old the rest of our stream is, and if it
00257       is too old, then we should overwrite it entirely, otherwise we can
00258       get artifacts of earlier talk that do not belong */
00259    memcpy(tmp->buf + pvt->samples, f->data, f->datalen);
00260    pvt->samples += f->samples;
00261    return 0;
00262 }
00263 
00264 /*! \brief convert work buffer and produce output frame */
00265 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
00266 {
00267    struct speex_coder_pvt *tmp = pvt->pvt;
00268    int is_speech=1;
00269    int datalen = 0;  /* output bytes */
00270    int samples = 0;  /* output samples */
00271 
00272    /* We can't work on anything less than a frame in size */
00273    if (pvt->samples < tmp->framesize)
00274       return NULL;
00275    speex_bits_reset(&tmp->bits);
00276    while (pvt->samples >= tmp->framesize) {
00277 #ifdef _SPEEX_TYPES_H
00278       /* Preprocess audio */
00279       if (preproc)
00280          is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
00281       /* Encode a frame of data */
00282       if (is_speech) {
00283          /* If DTX enabled speex_encode returns 0 during silence */
00284          is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
00285       } else {
00286          /* 5 zeros interpreted by Speex as silence (submode 0) */
00287          speex_bits_pack(&tmp->bits, 0, 5);
00288       }
00289 #else
00290       {
00291          float fbuf[1024];
00292          int x;
00293          /* Convert to floating point */
00294          for (x = 0; x < tmp->framesize; x++)
00295             fbuf[x] = tmp->buf[samples + x];
00296          /* Encode a frame of data */
00297          is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
00298       }
00299 #endif
00300       samples += tmp->framesize;
00301       pvt->samples -= tmp->framesize;
00302    }
00303 
00304    /* Move the data at the end of the buffer to the front */
00305    if (pvt->samples)
00306       memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
00307 
00308    /* Use AST_FRAME_CNG to signify the start of any silence period */
00309    if (is_speech) {
00310       tmp->silent_state = 0;
00311    } else {
00312       if (tmp->silent_state) {
00313          return NULL;
00314       } else {
00315          tmp->silent_state = 1;
00316          speex_bits_reset(&tmp->bits);
00317          memset(&pvt->f, 0, sizeof(pvt->f));
00318          pvt->f.frametype = AST_FRAME_CNG;
00319          pvt->f.samples = samples;
00320          /* XXX what now ? format etc... */
00321       }
00322    }
00323 
00324    /* Terminate bit stream */
00325    speex_bits_pack(&tmp->bits, 15, 5);
00326    datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size);
00327    return ast_trans_frameout(pvt, datalen, samples);
00328 }
00329 
00330 static void speextolin_destroy(struct ast_trans_pvt *arg)
00331 {
00332    struct speex_coder_pvt *pvt = arg->pvt;
00333 
00334    speex_decoder_destroy(pvt->speex);
00335    speex_bits_destroy(&pvt->bits);
00336 }
00337 
00338 static void lintospeex_destroy(struct ast_trans_pvt *arg)
00339 {
00340    struct speex_coder_pvt *pvt = arg->pvt;
00341 #ifdef _SPEEX_TYPES_H
00342    if (preproc)
00343       speex_preprocess_state_destroy(pvt->pp);
00344 #endif
00345    speex_encoder_destroy(pvt->speex);
00346    speex_bits_destroy(&pvt->bits);
00347 }
00348 
00349 static struct ast_translator speextolin = {
00350    .name = "speextolin", 
00351    .srcfmt = AST_FORMAT_SPEEX,
00352    .dstfmt =  AST_FORMAT_SLINEAR,
00353    .newpvt = speextolin_new,
00354    .framein = speextolin_framein,
00355    .destroy = speextolin_destroy,
00356    .sample = speextolin_sample,
00357    .desc_size = sizeof(struct speex_coder_pvt),
00358    .buffer_samples = BUFFER_SAMPLES,
00359    .buf_size = BUFFER_SAMPLES * 2,
00360    .native_plc = 1,
00361 };
00362 
00363 static struct ast_translator lintospeex = {
00364    .name = "lintospeex", 
00365    .srcfmt = AST_FORMAT_SLINEAR,
00366    .dstfmt = AST_FORMAT_SPEEX,
00367    .newpvt = lintospeex_new,
00368    .framein = lintospeex_framein,
00369    .frameout = lintospeex_frameout,
00370    .destroy = lintospeex_destroy,
00371    .sample = lintospeex_sample,
00372    .desc_size = sizeof(struct speex_coder_pvt),
00373    .buffer_samples = BUFFER_SAMPLES,
00374    .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
00375 };
00376 
00377 static void parse_config(void) 
00378 {
00379    struct ast_config *cfg = ast_config_load("codecs.conf");
00380    struct ast_variable *var;
00381    int res;
00382    float res_f;
00383 
00384    if (cfg == NULL)
00385       return;
00386 
00387    for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
00388       if (!strcasecmp(var->name, "quality")) {
00389          res = abs(atoi(var->value));
00390          if (res > -1 && res < 11) {
00391             if (option_verbose > 2)
00392                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Quality to %d\n",res);
00393             quality = res;
00394          } else 
00395             ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
00396       } else if (!strcasecmp(var->name, "complexity")) {
00397          res = abs(atoi(var->value));
00398          if (res > -1 && res < 11) {
00399             if (option_verbose > 2)
00400                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Complexity to %d\n",res);
00401             complexity = res;
00402          } else 
00403             ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
00404       } else if (!strcasecmp(var->name, "vbr_quality")) {
00405          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
00406             if (option_verbose > 2)
00407                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
00408             vbr_quality = res_f;
00409          } else
00410             ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
00411       } else if (!strcasecmp(var->name, "abr_quality")) {
00412          ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
00413       } else if (!strcasecmp(var->name, "enhancement")) {
00414          enhancement = ast_true(var->value) ? 1 : 0;
00415          if (option_verbose > 2)
00416             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
00417       } else if (!strcasecmp(var->name, "vbr")) {
00418          vbr = ast_true(var->value) ? 1 : 0;
00419          if (option_verbose > 2)
00420             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
00421       } else if (!strcasecmp(var->name, "abr")) {
00422          res = abs(atoi(var->value));
00423          if (res >= 0) {
00424             if (option_verbose > 2) {
00425                if (res > 0)
00426                   ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
00427                else
00428                   ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Disabling ABR\n");
00429             }
00430             abr = res;
00431          } else 
00432             ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
00433       } else if (!strcasecmp(var->name, "vad")) {
00434          vad = ast_true(var->value) ? 1 : 0;
00435          if (option_verbose > 2)
00436             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
00437       } else if (!strcasecmp(var->name, "dtx")) {
00438          dtx = ast_true(var->value) ? 1 : 0;
00439          if (option_verbose > 2)
00440             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
00441       } else if (!strcasecmp(var->name, "preprocess")) {
00442          preproc = ast_true(var->value) ? 1 : 0;
00443          if (option_verbose > 2)
00444             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
00445       } else if (!strcasecmp(var->name, "pp_vad")) {
00446          pp_vad = ast_true(var->value) ? 1 : 0;
00447          if (option_verbose > 2)
00448             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
00449       } else if (!strcasecmp(var->name, "pp_agc")) {
00450          pp_agc = ast_true(var->value) ? 1 : 0;
00451          if (option_verbose > 2)
00452             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
00453       } else if (!strcasecmp(var->name, "pp_agc_level")) {
00454          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00455             if (option_verbose > 2)
00456                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
00457             pp_agc_level = res_f;
00458          } else
00459             ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
00460       } else if (!strcasecmp(var->name, "pp_denoise")) {
00461          pp_denoise = ast_true(var->value) ? 1 : 0;
00462          if (option_verbose > 2)
00463             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
00464       } else if (!strcasecmp(var->name, "pp_dereverb")) {
00465          pp_dereverb = ast_true(var->value) ? 1 : 0;
00466          if (option_verbose > 2)
00467             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
00468       } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
00469          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00470             if (option_verbose > 2)
00471                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
00472             pp_dereverb_decay = res_f;
00473          } else
00474             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
00475       } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
00476          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00477             if (option_verbose > 2)
00478                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
00479             pp_dereverb_level = res_f;
00480          } else
00481             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
00482       }
00483    }
00484    ast_config_destroy(cfg);
00485 }
00486 
00487 static int reload(void) 
00488 {
00489    parse_config();
00490 
00491    return 0;
00492 }
00493 
00494 static int unload_module(void)
00495 {
00496    int res;
00497 
00498    res = ast_unregister_translator(&lintospeex);
00499    res |= ast_unregister_translator(&speextolin);
00500 
00501    return res;
00502 }
00503 
00504 static int load_module(void)
00505 {
00506    int res;
00507 
00508    parse_config();
00509    res=ast_register_translator(&speextolin);
00510    if (!res) 
00511       res=ast_register_translator(&lintospeex);
00512    else
00513       ast_unregister_translator(&speextolin);
00514 
00515    return res;
00516 }
00517 
00518 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
00519       .load = load_module,
00520       .unload = unload_module,
00521       .reload = reload,
00522           );

Generated on Sat Aug 6 00:39:28 2011 for Asterisk - the Open Source PBX by  doxygen 1.4.7