plc.h

Go to the documentation of this file.
00001 /*
00002  * SpanDSP - a series of DSP components for telephony
00003  *
00004  * plc.h
00005  *
00006  * Written by Steve Underwood <steveu@coppice.org>
00007  *
00008  * Copyright (C) 2004 Steve Underwood
00009  *
00010  * All rights reserved.
00011  *
00012  * This program is free software; you can redistribute it and/or modify
00013  * it under the terms of the GNU Lesser General Public License version 2.1,
00014  * as published by the Free Software Foundation.
00015  *
00016  * This program is distributed in the hope that it will be useful,
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  * GNU Lesser General Public License for more details.
00020  *
00021  * You should have received a copy of the GNU Lesser General Public
00022  * License along with this program; if not, write to the Free Software
00023  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00024  */
00025 
00026 /*! \file */
00027 
00028 #if !defined(_SPANDSP_PLC_H_)
00029 #define _SPANDSP_PLC_H_
00030 
00031 /*! \page plc_page Packet loss concealment
00032 \section plc_page_sec_1 What does it do?
00033 The packet loss concealment module provides a synthetic fill-in signal, to minimise
00034 the audible effect of lost packets in VoIP applications. It is not tied to any
00035 particular codec, and could be used with almost any codec which does not
00036 specify its own procedure for packet loss concealment.
00037 
00038 Where a codec specific concealment procedure exists, that algorithm is usually built
00039 around knowledge of the characteristics of the particular codec. It will, therefore,
00040 generally give better results for that particular codec than this generic concealer will.
00041 
00042 The PLC code implements an algorithm similar to the one described in Appendix 1 of G.711.
00043 However, the G.711 algorithm is optimised for 10ms packets. Few people use such small
00044 packets. 20ms is a much more common value, and longer packets are also quite common. The
00045 algorithm has been adjusted with this in mind. Also, the G.711 approach causes an
00046 algorithmic delay, and requires significant buffer manipulation when there is no packet
00047 loss. The algorithm used here avoids this. It causes no delay, and achieves comparable
00048 quality with normal speech.
00049 
00050 Note that both this algorithm, and the one in G.711 are optimised for speech. For most kinds
00051 of music a much slower decay on bursts of lost packets give better results.
00052 
00053 \section plc_page_sec_2 How does it work?
00054 While good packets are being received, the plc_rx() routine keeps a record of the trailing
00055 section of the known speech signal. If a packet is missed, plc_fillin() is called to produce
00056 a synthetic replacement for the real speech signal. The average mean difference function
00057 (AMDF) is applied to the last known good signal, to determine its effective pitch.
00058 Based on this, the last pitch period of signal is saved. Essentially, this cycle of speech
00059 will be repeated over and over until the real speech resumes. However, several refinements
00060 are needed to obtain smooth pleasant sounding results.
00061 
00062 - The two ends of the stored cycle of speech will not always fit together smoothly. This can
00063   cause roughness, or even clicks, at the joins between cycles. To soften this, the
00064   1/4 pitch period of real speech preceeding the cycle to be repeated is blended with the last
00065   1/4 pitch period of the cycle to be repeated, using an overlap-add (OLA) technique (i.e.
00066   in total, the last 5/4 pitch periods of real speech are used).
00067 
00068 - The start of the synthetic speech will not always fit together smoothly with the tail of
00069   real speech passed on before the erasure was identified. Ideally, we would like to modify
00070   the last 1/4 pitch period of the real speech, to blend it into the synthetic speech. However,
00071   it is too late for that. We could have delayed the real speech a little, but that would
00072   require more buffer manipulation, and hurt the efficiency of the no-lost-packets case
00073   (which we hope is the dominant case). Instead we use a degenerate form of OLA to modify
00074   the start of the synthetic data. The last 1/4 pitch period of real speech is time reversed,
00075   and OLA is used to blend it with the first 1/4 pitch period of synthetic speech. The result
00076   seems quite acceptable.
00077 
00078 - As we progress into the erasure, the chances of the synthetic signal being anything like
00079   correct steadily fall. Therefore, the volume of the synthesized signal is made to decay
00080   linearly, such that after 50ms of missing audio it is reduced to silence.
00081 
00082 - When real speech resumes, an extra 1/4 pitch period of synthetic speech is blended with the
00083   start of the real speech. If the erasure is small, this smoothes the transition. If the erasure
00084   is long, and the synthetic signal has faded to zero, the blending softens the start up of the
00085   real signal, avoiding a kind of "click" or "pop" effect that might occur with a sudden onset.
00086 
00087 \section plc_page_sec_3 How do I use it?
00088 Before audio is processed, call plc_init() to create an instance of the packet loss
00089 concealer. For each received audio packet that is acceptable (i.e. not including those being
00090 dropped for being too late) call plc_rx() to record the content of the packet. Note this may
00091 modify the packet a little after a period of packet loss, to blend real synthetic data smoothly.
00092 When a real packet is not available in time, call plc_fillin() to create a sythetic substitute.
00093 That's it!
00094 */
00095 
00096 /*! Minimum allowed pitch (66 Hz) */
00097 #define PLC_PITCH_MIN           120
00098 /*! Maximum allowed pitch (200 Hz) */
00099 #define PLC_PITCH_MAX           40
00100 /*! Maximum pitch OLA window */
00101 #define PLC_PITCH_OVERLAP_MAX   (PLC_PITCH_MIN >> 2)
00102 /*! The length over which the AMDF function looks for similarity (20 ms) */
00103 #define CORRELATION_SPAN        160
00104 /*! History buffer length. The buffer much also be at leat 1.25 times
00105     PLC_PITCH_MIN, but that is much smaller than the buffer needs to be for
00106     the pitch assessment. */
00107 #define PLC_HISTORY_LEN         (CORRELATION_SPAN + PLC_PITCH_MIN)
00108 
00109 /*!
00110     The generic packet loss concealer context.
00111 */
00112 typedef struct
00113 {
00114     /*! Consecutive erased samples */
00115     int missing_samples;
00116     /*! Current offset into pitch period */
00117     int pitch_offset;
00118     /*! Pitch estimate */
00119     int pitch;
00120     /*! Buffer for a cycle of speech */
00121     float pitchbuf[PLC_PITCH_MIN];
00122     /*! History buffer */
00123     int16_t history[PLC_HISTORY_LEN];
00124     /*! Current pointer into the history buffer */
00125     int buf_ptr;
00126 } plc_state_t;
00127 
00128 
00129 #if defined(__cplusplus)
00130 extern "C"
00131 {
00132 #endif
00133 
00134 /*! Process a block of received audio samples for PLC.
00135     \brief Process a block of received audio samples for PLC.
00136     \param s The packet loss concealer context.
00137     \param amp The audio sample buffer.
00138     \param len The number of samples in the buffer.
00139     \return The number of samples in the buffer. */
00140 SPAN_DECLARE(int) plc_rx(plc_state_t *s, int16_t amp[], int len);
00141 
00142 /*! Fill-in a block of missing audio samples.
00143     \brief Fill-in a block of missing audio samples.
00144     \param s The packet loss concealer context.
00145     \param amp The audio sample buffer.
00146     \param len The number of samples to be synthesised.
00147     \return The number of samples synthesized. */
00148 SPAN_DECLARE(int) plc_fillin(plc_state_t *s, int16_t amp[], int len);
00149 
00150 /*! Initialise a packet loss concealer context.
00151     \brief Initialise a PLC context.
00152     \param s The packet loss concealer context.
00153     \return A pointer to the the packet loss concealer context. */
00154 SPAN_DECLARE(plc_state_t *) plc_init(plc_state_t *s);
00155 
00156 /*! Release a packet loss concealer context.
00157     \param s The packet loss concealer context.
00158     \return 0 for OK. */
00159 SPAN_DECLARE(int) plc_release(plc_state_t *s);
00160 
00161 /*! Free a packet loss concealer context.
00162     \param s The packet loss concealer context.
00163     \return 0 for OK. */
00164 SPAN_DECLARE(int) plc_free(plc_state_t *s);
00165 
00166 #if defined(__cplusplus)
00167 }
00168 #endif
00169 
00170 #endif
00171 /*- End of file ------------------------------------------------------------*/

Generated on 9 Jul 2012 for spandsp by  doxygen 1.6.1