Wed Apr 6 11:29:44 2011

Asterisk developer's documentation


codec_speex.c

Go to the documentation of this file.
00001 /*
00002  * Asterisk -- An open source telephony toolkit.
00003  *
00004  * Copyright (C) 1999 - 2005, Digium, Inc.
00005  *
00006  * Mark Spencer <markster@digium.com>
00007  *
00008  *
00009  * See http://www.asterisk.org for more information about
00010  * the Asterisk project. Please do not directly contact
00011  * any of the maintainers of this project for assistance;
00012  * the project provides a web site, mailing lists and IRC
00013  * channels for your use.
00014  *
00015  * This program is free software, distributed under the terms of
00016  * the GNU General Public License Version 2. See the LICENSE file
00017  * at the top of the source tree.
00018  */
00019 
00020 /*! \file
00021  *
00022  * \brief Translate between signed linear and Speex (Open Codec)
00023  *
00024  * \note This work was motivated by Jeremy McNamara 
00025  * hacked to be configurable by anthm and bkw 9/28/2004
00026  *
00027  * \ingroup codecs
00028  *
00029  * \extref The Speex library - http://www.speex.org
00030  *
00031  */
00032 
00033 /*** MODULEINFO
00034    <depend>speex</depend>
00035    <depend>speex_preprocess</depend>
00036    <use>speexdsp</use>
00037  ***/
00038 
00039 #include "asterisk.h"
00040 
00041 ASTERISK_FILE_VERSION(__FILE__, "$Revision: 271625 $")
00042 
00043 #include <speex/speex.h>
00044 
00045 /* We require a post 1.1.8 version of Speex to enable preprocessing
00046    and better type handling */   
00047 #ifdef _SPEEX_TYPES_H
00048 #include <speex/speex_preprocess.h>
00049 #endif
00050 
00051 #include "asterisk/translate.h"
00052 #include "asterisk/module.h"
00053 #include "asterisk/config.h"
00054 #include "asterisk/utils.h"
00055 
00056 /* codec variables */
00057 static int quality = 3;
00058 static int complexity = 2;
00059 static int enhancement = 0;
00060 static int vad = 0;
00061 static int vbr = 0;
00062 static float vbr_quality = 4;
00063 static int abr = 0;
00064 static int dtx = 0;  /* set to 1 to enable silence detection */
00065 
00066 static int preproc = 0;
00067 static int pp_vad = 0;
00068 static int pp_agc = 0;
00069 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
00070 static int pp_denoise = 0;
00071 static int pp_dereverb = 0;
00072 static float pp_dereverb_decay = 0.4;
00073 static float pp_dereverb_level = 0.3;
00074 
00075 #define TYPE_SILENCE  0x2
00076 #define TYPE_HIGH  0x0
00077 #define TYPE_LOW   0x1
00078 #define TYPE_MASK  0x3
00079 
00080 #define  BUFFER_SAMPLES 8000
00081 #define  SPEEX_SAMPLES  160
00082 
00083 /* Sample frame data */
00084 #include "asterisk/slin.h"
00085 #include "ex_speex.h"
00086 
00087 struct speex_coder_pvt {
00088    void *speex;
00089    SpeexBits bits;
00090    int framesize;
00091    int silent_state;
00092 #ifdef _SPEEX_TYPES_H
00093    SpeexPreprocessState *pp;
00094    spx_int16_t buf[BUFFER_SAMPLES];
00095 #else
00096    int16_t buf[BUFFER_SAMPLES];  /* input, waiting to be compressed */
00097 #endif
00098 };
00099 
00100 static int speex_encoder_construct(struct ast_trans_pvt *pvt, const SpeexMode *profile, int sampling_rate)
00101 {
00102    struct speex_coder_pvt *tmp = pvt->pvt;
00103 
00104    if (!(tmp->speex = speex_encoder_init(profile)))
00105       return -1;
00106 
00107    speex_bits_init(&tmp->bits);
00108    speex_bits_reset(&tmp->bits);
00109    speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00110    speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
00111 #ifdef _SPEEX_TYPES_H
00112    if (preproc) {
00113       tmp->pp = speex_preprocess_state_init(tmp->framesize, sampling_rate);
00114       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
00115       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
00116       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
00117       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
00118       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
00119       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
00120       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
00121    }
00122 #endif
00123    if (!abr && !vbr) {
00124       speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
00125       if (vad)
00126          speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
00127    }
00128    if (vbr) {
00129       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
00130       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
00131    }
00132    if (abr)
00133       speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
00134    if (dtx)
00135       speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx); 
00136    tmp->silent_state = 0;
00137 
00138    return 0;
00139 }
00140 
00141 static int lintospeex_new(struct ast_trans_pvt *pvt)
00142 {
00143    return speex_encoder_construct(pvt, &speex_nb_mode, 8000);
00144 }
00145 
00146 static int lin16tospeexwb_new(struct ast_trans_pvt *pvt)
00147 {
00148    return speex_encoder_construct(pvt, &speex_wb_mode, 16000);
00149 }
00150 
00151 static int speex_decoder_construct(struct ast_trans_pvt *pvt, const SpeexMode *profile)
00152 {
00153    struct speex_coder_pvt *tmp = pvt->pvt;
00154    
00155    if (!(tmp->speex = speex_decoder_init(profile)))
00156       return -1;
00157 
00158    speex_bits_init(&tmp->bits);
00159    speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00160    if (enhancement)
00161       speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
00162 
00163    return 0;
00164 }
00165 
00166 static int speextolin_new(struct ast_trans_pvt *pvt)
00167 {
00168    return speex_decoder_construct(pvt, &speex_nb_mode);
00169 }
00170 
00171 static int speexwbtolin16_new(struct ast_trans_pvt *pvt)
00172 {
00173    return speex_decoder_construct(pvt, &speex_wb_mode);
00174 }
00175 
00176 /*! \brief convert and store into outbuf */
00177 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00178 {
00179    struct speex_coder_pvt *tmp = pvt->pvt;
00180 
00181    /* Assuming there's space left, decode into the current buffer at
00182       the tail location.  Read in as many frames as there are */
00183    int x;
00184    int res;
00185    int16_t *dst = pvt->outbuf.i16;
00186    /* XXX fout is a temporary buffer, may have different types */
00187 #ifdef _SPEEX_TYPES_H
00188    spx_int16_t fout[1024];
00189 #else
00190    float fout[1024];
00191 #endif
00192 
00193    if (f->datalen == 0) {  /* Native PLC interpolation */
00194       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00195          ast_log(LOG_WARNING, "Out of buffer space\n");
00196          return -1;
00197       }
00198 #ifdef _SPEEX_TYPES_H
00199       speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
00200 #else
00201       speex_decode(tmp->speex, NULL, fout);
00202       for (x=0;x<tmp->framesize;x++) {
00203          dst[pvt->samples + x] = (int16_t)fout[x];
00204       }
00205 #endif
00206       pvt->samples += tmp->framesize;
00207       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00208       return 0;
00209    }
00210 
00211    /* Read in bits */
00212    speex_bits_read_from(&tmp->bits, f->data.ptr, f->datalen);
00213    for (;;) {
00214 #ifdef _SPEEX_TYPES_H
00215       res = speex_decode_int(tmp->speex, &tmp->bits, fout);
00216 #else
00217       res = speex_decode(tmp->speex, &tmp->bits, fout);
00218 #endif
00219       if (res < 0)
00220          break;
00221       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00222          ast_log(LOG_WARNING, "Out of buffer space\n");
00223          return -1;
00224       }
00225       for (x = 0 ; x < tmp->framesize; x++)
00226          dst[pvt->samples + x] = (int16_t)fout[x];
00227       pvt->samples += tmp->framesize;
00228       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00229    }
00230    return 0;
00231 }
00232 
00233 /*! \brief store input frame in work buffer */
00234 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00235 {
00236    struct speex_coder_pvt *tmp = pvt->pvt;
00237 
00238    /* XXX We should look at how old the rest of our stream is, and if it
00239       is too old, then we should overwrite it entirely, otherwise we can
00240       get artifacts of earlier talk that do not belong */
00241    memcpy(tmp->buf + pvt->samples, f->data.ptr, f->datalen);
00242    pvt->samples += f->samples;
00243    return 0;
00244 }
00245 
00246 /*! \brief convert work buffer and produce output frame */
00247 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
00248 {
00249    struct speex_coder_pvt *tmp = pvt->pvt;
00250    int is_speech=1;
00251    int datalen = 0;  /* output bytes */
00252    int samples = 0;  /* output samples */
00253 
00254    /* We can't work on anything less than a frame in size */
00255    if (pvt->samples < tmp->framesize)
00256       return NULL;
00257    speex_bits_reset(&tmp->bits);
00258    while (pvt->samples >= tmp->framesize) {
00259 #ifdef _SPEEX_TYPES_H
00260       /* Preprocess audio */
00261       if (preproc)
00262          is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
00263       /* Encode a frame of data */
00264       if (is_speech) {
00265          /* If DTX enabled speex_encode returns 0 during silence */
00266          is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
00267       } else {
00268          /* 5 zeros interpreted by Speex as silence (submode 0) */
00269          speex_bits_pack(&tmp->bits, 0, 5);
00270       }
00271 #else
00272       {
00273          float fbuf[1024];
00274          int x;
00275          /* Convert to floating point */
00276          for (x = 0; x < tmp->framesize; x++)
00277             fbuf[x] = tmp->buf[samples + x];
00278          /* Encode a frame of data */
00279          is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
00280       }
00281 #endif
00282       samples += tmp->framesize;
00283       pvt->samples -= tmp->framesize;
00284    }
00285 
00286    /* Move the data at the end of the buffer to the front */
00287    if (pvt->samples)
00288       memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
00289 
00290    /* Use AST_FRAME_CNG to signify the start of any silence period */
00291    if (is_speech) {
00292       tmp->silent_state = 0;
00293    } else {
00294       if (tmp->silent_state) {
00295          return NULL;
00296       } else {
00297          tmp->silent_state = 1;
00298          speex_bits_reset(&tmp->bits);
00299          memset(&pvt->f, 0, sizeof(pvt->f));
00300          pvt->f.frametype = AST_FRAME_CNG;
00301          pvt->f.samples = samples;
00302          /* XXX what now ? format etc... */
00303       }
00304    }
00305 
00306    /* Terminate bit stream */
00307    speex_bits_pack(&tmp->bits, 15, 5);
00308    datalen = speex_bits_write(&tmp->bits, pvt->outbuf.c, pvt->t->buf_size);
00309    return ast_trans_frameout(pvt, datalen, samples);
00310 }
00311 
00312 static void speextolin_destroy(struct ast_trans_pvt *arg)
00313 {
00314    struct speex_coder_pvt *pvt = arg->pvt;
00315 
00316    speex_decoder_destroy(pvt->speex);
00317    speex_bits_destroy(&pvt->bits);
00318 }
00319 
00320 static void lintospeex_destroy(struct ast_trans_pvt *arg)
00321 {
00322    struct speex_coder_pvt *pvt = arg->pvt;
00323 #ifdef _SPEEX_TYPES_H
00324    if (preproc)
00325       speex_preprocess_state_destroy(pvt->pp);
00326 #endif
00327    speex_encoder_destroy(pvt->speex);
00328    speex_bits_destroy(&pvt->bits);
00329 }
00330 
00331 static struct ast_translator speextolin = {
00332    .name = "speextolin", 
00333    .srcfmt = AST_FORMAT_SPEEX,
00334    .dstfmt =  AST_FORMAT_SLINEAR,
00335    .newpvt = speextolin_new,
00336    .framein = speextolin_framein,
00337    .destroy = speextolin_destroy,
00338    .sample = speex_sample,
00339    .desc_size = sizeof(struct speex_coder_pvt),
00340    .buffer_samples = BUFFER_SAMPLES,
00341    .buf_size = BUFFER_SAMPLES * 2,
00342    .native_plc = 1,
00343 };
00344 
00345 static struct ast_translator lintospeex = {
00346    .name = "lintospeex", 
00347    .srcfmt = AST_FORMAT_SLINEAR,
00348    .dstfmt = AST_FORMAT_SPEEX,
00349    .newpvt = lintospeex_new,
00350    .framein = lintospeex_framein,
00351    .frameout = lintospeex_frameout,
00352    .destroy = lintospeex_destroy,
00353    .sample = slin8_sample,
00354    .desc_size = sizeof(struct speex_coder_pvt),
00355    .buffer_samples = BUFFER_SAMPLES,
00356    .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
00357 };
00358 
00359 static struct ast_translator speexwbtolin16 = {
00360    .name = "speexwbtolin16", 
00361    .srcfmt = AST_FORMAT_SPEEX16,
00362    .dstfmt =  AST_FORMAT_SLINEAR16,
00363    .newpvt = speexwbtolin16_new,
00364    .framein = speextolin_framein,
00365    .destroy = speextolin_destroy,
00366    .sample = speex16_sample,
00367    .desc_size = sizeof(struct speex_coder_pvt),
00368    .buffer_samples = BUFFER_SAMPLES,
00369    .buf_size = BUFFER_SAMPLES * 2,
00370    .native_plc = 1,
00371 };
00372 
00373 static struct ast_translator lin16tospeexwb = {
00374    .name = "lin16tospeexwb", 
00375    .srcfmt = AST_FORMAT_SLINEAR16,
00376    .dstfmt = AST_FORMAT_SPEEX16,
00377    .newpvt = lin16tospeexwb_new,
00378    .framein = lintospeex_framein,
00379    .frameout = lintospeex_frameout,
00380    .destroy = lintospeex_destroy,
00381    .sample = slin16_sample,
00382    .desc_size = sizeof(struct speex_coder_pvt),
00383    .buffer_samples = BUFFER_SAMPLES,
00384    .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
00385 };
00386 
00387 static int parse_config(int reload) 
00388 {
00389    struct ast_flags config_flags = { reload ? CONFIG_FLAG_FILEUNCHANGED : 0 };
00390    struct ast_config *cfg = ast_config_load("codecs.conf", config_flags);
00391    struct ast_variable *var;
00392    int res;
00393    float res_f;
00394 
00395    if (cfg == CONFIG_STATUS_FILEMISSING || cfg == CONFIG_STATUS_FILEUNCHANGED || cfg == CONFIG_STATUS_FILEINVALID)
00396       return 0;
00397 
00398    for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
00399       if (!strcasecmp(var->name, "quality")) {
00400          res = abs(atoi(var->value));
00401          if (res > -1 && res < 11) {
00402             ast_verb(3, "CODEC SPEEX: Setting Quality to %d\n",res);
00403             quality = res;
00404          } else 
00405             ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
00406       } else if (!strcasecmp(var->name, "complexity")) {
00407          res = abs(atoi(var->value));
00408          if (res > -1 && res < 11) {
00409             ast_verb(3, "CODEC SPEEX: Setting Complexity to %d\n",res);
00410             complexity = res;
00411          } else 
00412             ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
00413       } else if (!strcasecmp(var->name, "vbr_quality")) {
00414          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
00415             ast_verb(3, "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
00416             vbr_quality = res_f;
00417          } else
00418             ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
00419       } else if (!strcasecmp(var->name, "abr_quality")) {
00420          ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
00421       } else if (!strcasecmp(var->name, "enhancement")) {
00422          enhancement = ast_true(var->value) ? 1 : 0;
00423          ast_verb(3, "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
00424       } else if (!strcasecmp(var->name, "vbr")) {
00425          vbr = ast_true(var->value) ? 1 : 0;
00426          ast_verb(3, "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
00427       } else if (!strcasecmp(var->name, "abr")) {
00428          res = abs(atoi(var->value));
00429          if (res >= 0) {
00430                if (res > 0)
00431                ast_verb(3, "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
00432                else
00433                ast_verb(3, "CODEC SPEEX: Disabling ABR\n");
00434             abr = res;
00435          } else 
00436             ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
00437       } else if (!strcasecmp(var->name, "vad")) {
00438          vad = ast_true(var->value) ? 1 : 0;
00439          ast_verb(3, "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
00440       } else if (!strcasecmp(var->name, "dtx")) {
00441          dtx = ast_true(var->value) ? 1 : 0;
00442          ast_verb(3, "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
00443       } else if (!strcasecmp(var->name, "preprocess")) {
00444          preproc = ast_true(var->value) ? 1 : 0;
00445          ast_verb(3, "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
00446       } else if (!strcasecmp(var->name, "pp_vad")) {
00447          pp_vad = ast_true(var->value) ? 1 : 0;
00448          ast_verb(3, "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
00449       } else if (!strcasecmp(var->name, "pp_agc")) {
00450          pp_agc = ast_true(var->value) ? 1 : 0;
00451          ast_verb(3, "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
00452       } else if (!strcasecmp(var->name, "pp_agc_level")) {
00453          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00454             ast_verb(3, "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
00455             pp_agc_level = res_f;
00456          } else
00457             ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
00458       } else if (!strcasecmp(var->name, "pp_denoise")) {
00459          pp_denoise = ast_true(var->value) ? 1 : 0;
00460          ast_verb(3, "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
00461       } else if (!strcasecmp(var->name, "pp_dereverb")) {
00462          pp_dereverb = ast_true(var->value) ? 1 : 0;
00463          ast_verb(3, "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
00464       } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
00465          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00466             ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
00467             pp_dereverb_decay = res_f;
00468          } else
00469             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
00470       } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
00471          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00472             ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
00473             pp_dereverb_level = res_f;
00474          } else
00475             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
00476       }
00477    }
00478    ast_config_destroy(cfg);
00479    return 0;
00480 }
00481 
00482 static int reload(void) 
00483 {
00484    if (parse_config(1))
00485       return AST_MODULE_LOAD_DECLINE;
00486    return AST_MODULE_LOAD_SUCCESS;
00487 }
00488 
00489 static int unload_module(void)
00490 {
00491    int res = 0;
00492 
00493    res |= ast_unregister_translator(&speextolin);
00494    res |= ast_unregister_translator(&lintospeex);
00495    res |= ast_unregister_translator(&speexwbtolin16);
00496    res |= ast_unregister_translator(&lin16tospeexwb);
00497 
00498    return res;
00499 }
00500 
00501 static int load_module(void)
00502 {
00503    int res = 0;
00504 
00505    if (parse_config(0))
00506       return AST_MODULE_LOAD_DECLINE;
00507 
00508    res |= ast_register_translator(&speextolin);
00509    res |= ast_register_translator(&lintospeex);
00510    res |= ast_register_translator(&speexwbtolin16);
00511    res |= ast_register_translator(&lin16tospeexwb);
00512 
00513    return res;
00514 }
00515 
00516 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
00517       .load = load_module,
00518       .unload = unload_module,
00519       .reload = reload,
00520           );

Generated on Wed Apr 6 11:29:44 2011 for Asterisk - The Open Source Telephony Project by  doxygen 1.4.7