Wed Aug 7 17:15:40 2019

Asterisk developer's documentation


codec_speex.c

Go to the documentation of this file.
00001 /*
00002  * Asterisk -- An open source telephony toolkit.
00003  *
00004  * Copyright (C) 1999 - 2005, Digium, Inc.
00005  *
00006  * Mark Spencer <markster@digium.com>
00007  *
00008  *
00009  * See http://www.asterisk.org for more information about
00010  * the Asterisk project. Please do not directly contact
00011  * any of the maintainers of this project for assistance;
00012  * the project provides a web site, mailing lists and IRC
00013  * channels for your use.
00014  *
00015  * This program is free software, distributed under the terms of
00016  * the GNU General Public License Version 2. See the LICENSE file
00017  * at the top of the source tree.
00018  */
00019 
00020 /*! \file
00021  *
00022  * \brief Translate between signed linear and Speex (Open Codec)
00023  *
00024  * \note This work was motivated by Jeremy McNamara 
00025  * hacked to be configurable by anthm and bkw 9/28/2004
00026  *
00027  * \ingroup codecs
00028  *
00029  * \extref The Speex library - http://www.speex.org
00030  *
00031  */
00032 
00033 /*** MODULEINFO
00034    <depend>speex</depend>
00035    <depend>speex_preprocess</depend>
00036    <use>speexdsp</use>
00037    <support_level>core</support_level>
00038  ***/
00039 
00040 #include "asterisk.h"
00041 
00042 ASTERISK_FILE_VERSION(__FILE__, "$Revision: 328209 $")
00043 
00044 #include <speex/speex.h>
00045 
00046 /* We require a post 1.1.8 version of Speex to enable preprocessing
00047    and better type handling */   
00048 #ifdef _SPEEX_TYPES_H
00049 #include <speex/speex_preprocess.h>
00050 #endif
00051 
00052 #include "asterisk/translate.h"
00053 #include "asterisk/module.h"
00054 #include "asterisk/config.h"
00055 #include "asterisk/utils.h"
00056 
00057 /* codec variables */
00058 static int quality = 3;
00059 static int complexity = 2;
00060 static int enhancement = 0;
00061 static int vad = 0;
00062 static int vbr = 0;
00063 static float vbr_quality = 4;
00064 static int abr = 0;
00065 static int dtx = 0;  /* set to 1 to enable silence detection */
00066 
00067 static int preproc = 0;
00068 static int pp_vad = 0;
00069 static int pp_agc = 0;
00070 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
00071 static int pp_denoise = 0;
00072 static int pp_dereverb = 0;
00073 static float pp_dereverb_decay = 0.4;
00074 static float pp_dereverb_level = 0.3;
00075 
00076 #define TYPE_SILENCE  0x2
00077 #define TYPE_HIGH  0x0
00078 #define TYPE_LOW   0x1
00079 #define TYPE_MASK  0x3
00080 
00081 #define  BUFFER_SAMPLES 8000
00082 #define  SPEEX_SAMPLES  160
00083 
00084 /* Sample frame data */
00085 #include "asterisk/slin.h"
00086 #include "ex_speex.h"
00087 
00088 struct speex_coder_pvt {
00089    void *speex;
00090    SpeexBits bits;
00091    int framesize;
00092    int silent_state;
00093 #ifdef _SPEEX_TYPES_H
00094    SpeexPreprocessState *pp;
00095    spx_int16_t buf[BUFFER_SAMPLES];
00096 #else
00097    int16_t buf[BUFFER_SAMPLES];  /* input, waiting to be compressed */
00098 #endif
00099 };
00100 
00101 static int speex_encoder_construct(struct ast_trans_pvt *pvt, const SpeexMode *profile, int sampling_rate)
00102 {
00103    struct speex_coder_pvt *tmp = pvt->pvt;
00104 
00105    if (!(tmp->speex = speex_encoder_init(profile)))
00106       return -1;
00107 
00108    speex_bits_init(&tmp->bits);
00109    speex_bits_reset(&tmp->bits);
00110    speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00111    speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
00112 #ifdef _SPEEX_TYPES_H
00113    if (preproc) {
00114       tmp->pp = speex_preprocess_state_init(tmp->framesize, sampling_rate);
00115       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
00116       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
00117       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
00118       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
00119       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
00120       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
00121       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
00122    }
00123 #endif
00124    if (!abr && !vbr) {
00125       speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
00126       if (vad)
00127          speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
00128    }
00129    if (vbr) {
00130       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
00131       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
00132    }
00133    if (abr)
00134       speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
00135    if (dtx)
00136       speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx); 
00137    tmp->silent_state = 0;
00138 
00139    return 0;
00140 }
00141 
00142 static int lintospeex_new(struct ast_trans_pvt *pvt)
00143 {
00144    return speex_encoder_construct(pvt, &speex_nb_mode, 8000);
00145 }
00146 
00147 static int lin16tospeexwb_new(struct ast_trans_pvt *pvt)
00148 {
00149    return speex_encoder_construct(pvt, &speex_wb_mode, 16000);
00150 }
00151 
00152 static int speex_decoder_construct(struct ast_trans_pvt *pvt, const SpeexMode *profile)
00153 {
00154    struct speex_coder_pvt *tmp = pvt->pvt;
00155    
00156    if (!(tmp->speex = speex_decoder_init(profile)))
00157       return -1;
00158 
00159    speex_bits_init(&tmp->bits);
00160    speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00161    if (enhancement)
00162       speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
00163 
00164    return 0;
00165 }
00166 
00167 static int speextolin_new(struct ast_trans_pvt *pvt)
00168 {
00169    return speex_decoder_construct(pvt, &speex_nb_mode);
00170 }
00171 
00172 static int speexwbtolin16_new(struct ast_trans_pvt *pvt)
00173 {
00174    return speex_decoder_construct(pvt, &speex_wb_mode);
00175 }
00176 
00177 /*! \brief convert and store into outbuf */
00178 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00179 {
00180    struct speex_coder_pvt *tmp = pvt->pvt;
00181 
00182    /* Assuming there's space left, decode into the current buffer at
00183       the tail location.  Read in as many frames as there are */
00184    int x;
00185    int res;
00186    int16_t *dst = pvt->outbuf.i16;
00187    /* XXX fout is a temporary buffer, may have different types */
00188 #ifdef _SPEEX_TYPES_H
00189    spx_int16_t fout[1024];
00190 #else
00191    float fout[1024];
00192 #endif
00193 
00194    if (f->datalen == 0) {  /* Native PLC interpolation */
00195       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00196          ast_log(LOG_WARNING, "Out of buffer space\n");
00197          return -1;
00198       }
00199 #ifdef _SPEEX_TYPES_H
00200       speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
00201 #else
00202       speex_decode(tmp->speex, NULL, fout);
00203       for (x=0;x<tmp->framesize;x++) {
00204          dst[pvt->samples + x] = (int16_t)fout[x];
00205       }
00206 #endif
00207       pvt->samples += tmp->framesize;
00208       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00209       return 0;
00210    }
00211 
00212    /* Read in bits */
00213    speex_bits_read_from(&tmp->bits, f->data.ptr, f->datalen);
00214    for (;;) {
00215 #ifdef _SPEEX_TYPES_H
00216       res = speex_decode_int(tmp->speex, &tmp->bits, fout);
00217 #else
00218       res = speex_decode(tmp->speex, &tmp->bits, fout);
00219 #endif
00220       if (res < 0)
00221          break;
00222       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00223          ast_log(LOG_WARNING, "Out of buffer space\n");
00224          return -1;
00225       }
00226       for (x = 0 ; x < tmp->framesize; x++)
00227          dst[pvt->samples + x] = (int16_t)fout[x];
00228       pvt->samples += tmp->framesize;
00229       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00230    }
00231    return 0;
00232 }
00233 
00234 /*! \brief store input frame in work buffer */
00235 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00236 {
00237    struct speex_coder_pvt *tmp = pvt->pvt;
00238 
00239    /* XXX We should look at how old the rest of our stream is, and if it
00240       is too old, then we should overwrite it entirely, otherwise we can
00241       get artifacts of earlier talk that do not belong */
00242    memcpy(tmp->buf + pvt->samples, f->data.ptr, f->datalen);
00243    pvt->samples += f->samples;
00244    return 0;
00245 }
00246 
00247 /*! \brief convert work buffer and produce output frame */
00248 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
00249 {
00250    struct speex_coder_pvt *tmp = pvt->pvt;
00251    int is_speech=1;
00252    int datalen = 0;  /* output bytes */
00253    int samples = 0;  /* output samples */
00254 
00255    /* We can't work on anything less than a frame in size */
00256    if (pvt->samples < tmp->framesize)
00257       return NULL;
00258    speex_bits_reset(&tmp->bits);
00259    while (pvt->samples >= tmp->framesize) {
00260 #ifdef _SPEEX_TYPES_H
00261       /* Preprocess audio */
00262       if (preproc)
00263          is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
00264       /* Encode a frame of data */
00265       if (is_speech) {
00266          /* If DTX enabled speex_encode returns 0 during silence */
00267          is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
00268       } else {
00269          /* 5 zeros interpreted by Speex as silence (submode 0) */
00270          speex_bits_pack(&tmp->bits, 0, 5);
00271       }
00272 #else
00273       {
00274          float fbuf[1024];
00275          int x;
00276          /* Convert to floating point */
00277          for (x = 0; x < tmp->framesize; x++)
00278             fbuf[x] = tmp->buf[samples + x];
00279          /* Encode a frame of data */
00280          is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
00281       }
00282 #endif
00283       samples += tmp->framesize;
00284       pvt->samples -= tmp->framesize;
00285    }
00286 
00287    /* Move the data at the end of the buffer to the front */
00288    if (pvt->samples)
00289       memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
00290 
00291    /* Use AST_FRAME_CNG to signify the start of any silence period */
00292    if (is_speech) {
00293       tmp->silent_state = 0;
00294    } else {
00295       if (tmp->silent_state) {
00296          return NULL;
00297       } else {
00298          tmp->silent_state = 1;
00299          speex_bits_reset(&tmp->bits);
00300          memset(&pvt->f, 0, sizeof(pvt->f));
00301          pvt->f.frametype = AST_FRAME_CNG;
00302          pvt->f.samples = samples;
00303          /* XXX what now ? format etc... */
00304       }
00305    }
00306 
00307    /* Terminate bit stream */
00308    speex_bits_pack(&tmp->bits, 15, 5);
00309    datalen = speex_bits_write(&tmp->bits, pvt->outbuf.c, pvt->t->buf_size);
00310    return ast_trans_frameout(pvt, datalen, samples);
00311 }
00312 
00313 static void speextolin_destroy(struct ast_trans_pvt *arg)
00314 {
00315    struct speex_coder_pvt *pvt = arg->pvt;
00316 
00317    speex_decoder_destroy(pvt->speex);
00318    speex_bits_destroy(&pvt->bits);
00319 }
00320 
00321 static void lintospeex_destroy(struct ast_trans_pvt *arg)
00322 {
00323    struct speex_coder_pvt *pvt = arg->pvt;
00324 #ifdef _SPEEX_TYPES_H
00325    if (preproc)
00326       speex_preprocess_state_destroy(pvt->pp);
00327 #endif
00328    speex_encoder_destroy(pvt->speex);
00329    speex_bits_destroy(&pvt->bits);
00330 }
00331 
00332 static struct ast_translator speextolin = {
00333    .name = "speextolin", 
00334    .srcfmt = AST_FORMAT_SPEEX,
00335    .dstfmt =  AST_FORMAT_SLINEAR,
00336    .newpvt = speextolin_new,
00337    .framein = speextolin_framein,
00338    .destroy = speextolin_destroy,
00339    .sample = speex_sample,
00340    .desc_size = sizeof(struct speex_coder_pvt),
00341    .buffer_samples = BUFFER_SAMPLES,
00342    .buf_size = BUFFER_SAMPLES * 2,
00343    .native_plc = 1,
00344 };
00345 
00346 static struct ast_translator lintospeex = {
00347    .name = "lintospeex", 
00348    .srcfmt = AST_FORMAT_SLINEAR,
00349    .dstfmt = AST_FORMAT_SPEEX,
00350    .newpvt = lintospeex_new,
00351    .framein = lintospeex_framein,
00352    .frameout = lintospeex_frameout,
00353    .destroy = lintospeex_destroy,
00354    .sample = slin8_sample,
00355    .desc_size = sizeof(struct speex_coder_pvt),
00356    .buffer_samples = BUFFER_SAMPLES,
00357    .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
00358 };
00359 
00360 static struct ast_translator speexwbtolin16 = {
00361    .name = "speexwbtolin16", 
00362    .srcfmt = AST_FORMAT_SPEEX16,
00363    .dstfmt =  AST_FORMAT_SLINEAR16,
00364    .newpvt = speexwbtolin16_new,
00365    .framein = speextolin_framein,
00366    .destroy = speextolin_destroy,
00367    .sample = speex16_sample,
00368    .desc_size = sizeof(struct speex_coder_pvt),
00369    .buffer_samples = BUFFER_SAMPLES,
00370    .buf_size = BUFFER_SAMPLES * 2,
00371    .native_plc = 1,
00372 };
00373 
00374 static struct ast_translator lin16tospeexwb = {
00375    .name = "lin16tospeexwb", 
00376    .srcfmt = AST_FORMAT_SLINEAR16,
00377    .dstfmt = AST_FORMAT_SPEEX16,
00378    .newpvt = lin16tospeexwb_new,
00379    .framein = lintospeex_framein,
00380    .frameout = lintospeex_frameout,
00381    .destroy = lintospeex_destroy,
00382    .sample = slin16_sample,
00383    .desc_size = sizeof(struct speex_coder_pvt),
00384    .buffer_samples = BUFFER_SAMPLES,
00385    .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
00386 };
00387 
00388 static int parse_config(int reload) 
00389 {
00390    struct ast_flags config_flags = { reload ? CONFIG_FLAG_FILEUNCHANGED : 0 };
00391    struct ast_config *cfg = ast_config_load("codecs.conf", config_flags);
00392    struct ast_variable *var;
00393    int res;
00394    float res_f;
00395 
00396    if (cfg == CONFIG_STATUS_FILEMISSING || cfg == CONFIG_STATUS_FILEUNCHANGED || cfg == CONFIG_STATUS_FILEINVALID)
00397       return 0;
00398 
00399    for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
00400       if (!strcasecmp(var->name, "quality")) {
00401          res = abs(atoi(var->value));
00402          if (res > -1 && res < 11) {
00403             ast_verb(3, "CODEC SPEEX: Setting Quality to %d\n",res);
00404             quality = res;
00405          } else 
00406             ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
00407       } else if (!strcasecmp(var->name, "complexity")) {
00408          res = abs(atoi(var->value));
00409          if (res > -1 && res < 11) {
00410             ast_verb(3, "CODEC SPEEX: Setting Complexity to %d\n",res);
00411             complexity = res;
00412          } else 
00413             ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
00414       } else if (!strcasecmp(var->name, "vbr_quality")) {
00415          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
00416             ast_verb(3, "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
00417             vbr_quality = res_f;
00418          } else
00419             ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
00420       } else if (!strcasecmp(var->name, "abr_quality")) {
00421          ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
00422       } else if (!strcasecmp(var->name, "enhancement")) {
00423          enhancement = ast_true(var->value) ? 1 : 0;
00424          ast_verb(3, "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
00425       } else if (!strcasecmp(var->name, "vbr")) {
00426          vbr = ast_true(var->value) ? 1 : 0;
00427          ast_verb(3, "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
00428       } else if (!strcasecmp(var->name, "abr")) {
00429          res = abs(atoi(var->value));
00430          if (res >= 0) {
00431                if (res > 0)
00432                ast_verb(3, "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
00433                else
00434                ast_verb(3, "CODEC SPEEX: Disabling ABR\n");
00435             abr = res;
00436          } else 
00437             ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
00438       } else if (!strcasecmp(var->name, "vad")) {
00439          vad = ast_true(var->value) ? 1 : 0;
00440          ast_verb(3, "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
00441       } else if (!strcasecmp(var->name, "dtx")) {
00442          dtx = ast_true(var->value) ? 1 : 0;
00443          ast_verb(3, "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
00444       } else if (!strcasecmp(var->name, "preprocess")) {
00445          preproc = ast_true(var->value) ? 1 : 0;
00446          ast_verb(3, "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
00447       } else if (!strcasecmp(var->name, "pp_vad")) {
00448          pp_vad = ast_true(var->value) ? 1 : 0;
00449          ast_verb(3, "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
00450       } else if (!strcasecmp(var->name, "pp_agc")) {
00451          pp_agc = ast_true(var->value) ? 1 : 0;
00452          ast_verb(3, "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
00453       } else if (!strcasecmp(var->name, "pp_agc_level")) {
00454          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00455             ast_verb(3, "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
00456             pp_agc_level = res_f;
00457          } else
00458             ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
00459       } else if (!strcasecmp(var->name, "pp_denoise")) {
00460          pp_denoise = ast_true(var->value) ? 1 : 0;
00461          ast_verb(3, "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
00462       } else if (!strcasecmp(var->name, "pp_dereverb")) {
00463          pp_dereverb = ast_true(var->value) ? 1 : 0;
00464          ast_verb(3, "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
00465       } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
00466          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00467             ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
00468             pp_dereverb_decay = res_f;
00469          } else
00470             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
00471       } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
00472          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00473             ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
00474             pp_dereverb_level = res_f;
00475          } else
00476             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
00477       }
00478    }
00479    ast_config_destroy(cfg);
00480    return 0;
00481 }
00482 
00483 static int reload(void) 
00484 {
00485    if (parse_config(1))
00486       return AST_MODULE_LOAD_DECLINE;
00487    return AST_MODULE_LOAD_SUCCESS;
00488 }
00489 
00490 static int unload_module(void)
00491 {
00492    int res = 0;
00493 
00494    res |= ast_unregister_translator(&speextolin);
00495    res |= ast_unregister_translator(&lintospeex);
00496    res |= ast_unregister_translator(&speexwbtolin16);
00497    res |= ast_unregister_translator(&lin16tospeexwb);
00498 
00499    return res;
00500 }
00501 
00502 static int load_module(void)
00503 {
00504    int res = 0;
00505 
00506    if (parse_config(0))
00507       return AST_MODULE_LOAD_DECLINE;
00508 
00509    res |= ast_register_translator(&speextolin);
00510    res |= ast_register_translator(&lintospeex);
00511    res |= ast_register_translator(&speexwbtolin16);
00512    res |= ast_register_translator(&lin16tospeexwb);
00513 
00514    return res;
00515 }
00516 
00517 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
00518       .load = load_module,
00519       .unload = unload_module,
00520       .reload = reload,
00521           );

Generated on 7 Aug 2019 for Asterisk - The Open Source Telephony Project by  doxygen 1.6.1