00001 /*! \file 00002 * \brief SpanDSP - a series of DSP components for telephony 00003 * 00004 * plc.h 00005 * 00006 * \author Steve Underwood <steveu@coppice.org> 00007 * 00008 * Copyright (C) 2004 Steve Underwood 00009 * 00010 * All rights reserved. 00011 * 00012 * This program is free software; you can redistribute it and/or modify 00013 * it under the terms of the GNU General Public License as published by 00014 * the Free Software Foundation; either version 2 of the License, or 00015 * (at your option) any later version. 00016 * 00017 * This program is distributed in the hope that it will be useful, 00018 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 * GNU General Public License for more details. 00021 * 00022 * You should have received a copy of the GNU General Public License 00023 * along with this program; if not, write to the Free Software 00024 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00025 * 00026 * This version may be optionally licenced under the GNU LGPL licence. 00027 * 00028 * A license has been granted to Digium (via disclaimer) for the use of 00029 * this code. 00030 */ 00031 00032 00033 #if !defined(_SPANDSP_PLC_H_) 00034 #define _SPANDSP_PLC_H_ 00035 00036 #ifdef SOLARIS 00037 #include <sys/int_types.h> 00038 #else 00039 #if defined(__OpenBSD__) || defined( __FreeBSD__) 00040 #include <inttypes.h> 00041 #else 00042 #include <stdint.h> 00043 #endif 00044 #endif 00045 00046 /*! \page plc_page Packet loss concealment 00047 \section plc_page_sec_1 What does it do? 00048 The packet loss concealment module provides a suitable synthetic fill-in signal, 00049 to minimise the audible effect of lost packets in VoIP applications. It is not 00050 tied to any particular codec, and could be used with almost any codec which does not 00051 specify its own procedure for packet loss concealment. 00052 00053 Where a codec specific concealment procedure exists, the algorithm is usually built 00054 around knowledge of the characteristics of the particular codec. It will, therefore, 00055 generally give better results for that particular codec than this generic concealer will. 00056 00057 \section plc_page_sec_2 How does it work? 00058 While good packets are being received, the plc_rx() routine keeps a record of the trailing 00059 section of the known speech signal. If a packet is missed, plc_fillin() is called to produce 00060 a synthetic replacement for the real speech signal. The average mean difference function 00061 (AMDF) is applied to the last known good signal, to determine its effective pitch. 00062 Based on this, the last pitch period of signal is saved. Essentially, this cycle of speech 00063 will be repeated over and over until the real speech resumes. However, several refinements 00064 are needed to obtain smooth pleasant sounding results. 00065 00066 - The two ends of the stored cycle of speech will not always fit together smoothly. This can 00067 cause roughness, or even clicks, at the joins between cycles. To soften this, the 00068 1/4 pitch period of real speech preceeding the cycle to be repeated is blended with the last 00069 1/4 pitch period of the cycle to be repeated, using an overlap-add (OLA) technique (i.e. 00070 in total, the last 5/4 pitch periods of real speech are used). 00071 00072 - The start of the synthetic speech will not always fit together smoothly with the tail of 00073 real speech passed on before the erasure was identified. Ideally, we would like to modify 00074 the last 1/4 pitch period of the real speech, to blend it into the synthetic speech. However, 00075 it is too late for that. We could have delayed the real speech a little, but that would 00076 require more buffer manipulation, and hurt the efficiency of the no-lost-packets case 00077 (which we hope is the dominant case). Instead we use a degenerate form of OLA to modify 00078 the start of the synthetic data. The last 1/4 pitch period of real speech is time reversed, 00079 and OLA is used to blend it with the first 1/4 pitch period of synthetic speech. The result 00080 seems quite acceptable. 00081 00082 - As we progress into the erasure, the chances of the synthetic signal being anything like 00083 correct steadily fall. Therefore, the volume of the synthesized signal is made to decay 00084 linearly, such that after 50ms of missing audio it is reduced to silence. 00085 00086 - When real speech resumes, an extra 1/4 pitch period of sythetic speech is blended with the 00087 start of the real speech. If the erasure is small, this smoothes the transition. If the erasure 00088 is long, and the synthetic signal has faded to zero, the blending softens the start up of the 00089 real signal, avoiding a kind of "click" or "pop" effect that might occur with a sudden onset. 00090 00091 \section plc_page_sec_3 How do I use it? 00092 Before audio is processed, call plc_init() to create an instance of the packet loss 00093 concealer. For each received audio packet that is acceptable (i.e. not including those being 00094 dropped for being too late) call plc_rx() to record the content of the packet. Note this may 00095 modify the packet a little after a period of packet loss, to blend real synthetic data smoothly. 00096 When a real packet is not available in time, call plc_fillin() to create a sythetic substitute. 00097 That's it! 00098 */ 00099 00100 /*! Minimum allowed pitch (66 Hz) */ 00101 #define PLC_PITCH_MIN 120 00102 /*! Maximum allowed pitch (200 Hz) */ 00103 #define PLC_PITCH_MAX 40 00104 /*! Maximum pitch OLA window */ 00105 #define PLC_PITCH_OVERLAP_MAX (PLC_PITCH_MIN >> 2) 00106 /*! The length over which the AMDF function looks for similarity (20 ms) */ 00107 #define CORRELATION_SPAN 160 00108 /*! History buffer length. The buffer much also be at leat 1.25 times 00109 PLC_PITCH_MIN, but that is much smaller than the buffer needs to be for 00110 the pitch assessment. */ 00111 #define PLC_HISTORY_LEN (CORRELATION_SPAN + PLC_PITCH_MIN) 00112 00113 typedef struct 00114 { 00115 /*! Consecutive erased samples */ 00116 int missing_samples; 00117 /*! Current offset into pitch period */ 00118 int pitch_offset; 00119 /*! Pitch estimate */ 00120 int pitch; 00121 /*! Buffer for a cycle of speech */ 00122 float pitchbuf[PLC_PITCH_MIN]; 00123 /*! History buffer */ 00124 int16_t history[PLC_HISTORY_LEN]; 00125 /*! Current pointer into the history buffer */ 00126 int buf_ptr; 00127 } plc_state_t; 00128 00129 00130 #ifdef __cplusplus 00131 extern "C" { 00132 #endif 00133 00134 /*! Process a block of received audio samples. 00135 \brief Process a block of received audio samples. 00136 \param s The packet loss concealer context. 00137 \param amp The audio sample buffer. 00138 \param len The number of samples in the buffer. 00139 \return The number of samples in the buffer. */ 00140 int plc_rx(plc_state_t *s, int16_t amp[], int len); 00141 00142 /*! Fill-in a block of missing audio samples. 00143 \brief Fill-in a block of missing audio samples. 00144 \param s The packet loss concealer context. 00145 \param amp The audio sample buffer. 00146 \param len The number of samples to be synthesised. 00147 \return The number of samples synthesized. */ 00148 int plc_fillin(plc_state_t *s, int16_t amp[], int len); 00149 00150 /*! Process a block of received V.29 modem audio samples. 00151 \brief Process a block of received V.29 modem audio samples. 00152 \param s The packet loss concealer context. 00153 \return A pointer to the he packet loss concealer context. */ 00154 plc_state_t *plc_init(plc_state_t *s); 00155 00156 #ifdef __cplusplus 00157 } 00158 #endif 00159 00160 #endif 00161 /*- End of file ------------------------------------------------------------*/