audio_encode.c

00001 /*
00002  * iaxclient: a cross-platform IAX softphone library
00003  *
00004  * Copyrights:
00005  * Copyright (C) 2003-2006, Horizon Wimba, Inc.
00006  * Copyright (C) 2007, Wimba, Inc.
00007  *
00008  * Contributors:
00009  * Steve Kann <stevek@stevek.com>
00010  * Michael Van Donselaar <mvand@vandonselaar.org>
00011  * Shawn Lawrence <shawn.lawrence@terracecomm.com>
00012  *
00013  * This program is free software, distributed under the terms of
00014  * the GNU Lesser (Library) General Public License.
00015  */
00016 
00017 #include "iaxclient_lib.h"
00018 #include "iax-client.h"
00019 #ifdef CODEC_GSM
00020 #include "codec_gsm.h"
00021 #endif
00022 #include "codec_ulaw.h"
00023 #include "codec_alaw.h"
00024 #include "codec_speex.h"
00025 #include <speex/speex_preprocess.h>
00026 
00027 #ifdef CODEC_ILBC
00028         #include "codec_ilbc.h"
00029 #endif
00030 
00031 float iaxci_silence_threshold = -99.0f;
00032 
00033 static float input_level = 0.0f;
00034 static float output_level = 0.0f;
00035 
00036 static SpeexPreprocessState *st = NULL;
00037 static int speex_state_size = 0;
00038 static int speex_state_rate = 0;
00039 int iaxci_filters = IAXC_FILTER_AGC|IAXC_FILTER_DENOISE|IAXC_FILTER_AAGC|IAXC_FILTER_CN;
00040 
00041 /* use to measure time since last audio was processed */
00042 static struct timeval timeLastInput ;
00043 static struct timeval timeLastOutput ;
00044 
00045 static struct iaxc_speex_settings speex_settings =
00046 {
00047         1,    /* decode_enhance */
00048         -1,   /* float quality */
00049         -1,   /* bitrate */
00050         0,    /* vbr */
00051         0,    /* abr */
00052         3     /* complexity */
00053 };
00054 
00055 
00056 static float vol_to_db(float vol)
00057 {
00058         /* avoid calling log10() on zero which yields inf or
00059          * negative numbers which yield nan */
00060         if ( vol <= 0.0 )
00061                 return -99.9;
00062         else
00063                 return log10f(vol) * 20;
00064 }
00065 
00066 static int do_level_callback()
00067 {
00068         static struct timeval last = {0,0};
00069         struct timeval now;
00070         float input_db;
00071         float output_db;
00072 
00073         gettimeofday(&now, 0);
00074 
00075         if ( last.tv_sec != 0 && iaxci_usecdiff(&now, &last) < 100000 )
00076                 return 0;
00077 
00078         last = now;
00079 
00080         /* if input has not been processed in the last second, set to silent */
00081         input_db = iaxci_usecdiff(&now, &timeLastInput) < 1000000 ?
00082                         vol_to_db(input_level) : -99.9f;
00083 
00084         /* if output has not been processed in the last second, set to silent */
00085         output_db = iaxci_usecdiff(&now, &timeLastOutput) < 1000000 ?
00086                 vol_to_db(output_level) : -99.9f;
00087 
00088         iaxci_do_levels_callback(input_db, output_db);
00089 
00090         return 0;
00091 }
00092 
00093 static void set_speex_filters()
00094 {
00095         int i;
00096         float f;
00097 
00098         if(!st)
00099                 return;
00100 
00101         i = 1; /* always make VAD decision */
00102         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &i);
00103         i = (iaxci_filters & IAXC_FILTER_AGC) ? 1 : 0;
00104         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC, &i);
00105         i = (iaxci_filters & IAXC_FILTER_DENOISE) ? 1 : 0;
00106         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DENOISE, &i);
00107 
00108         /* make vad more sensitive */
00109         f = 0.30f;
00110         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_PROB_START, &f);
00111         f = 0.07f;
00112         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_PROB_CONTINUE, &f);
00113 }
00114 
00115 static void calculate_level(short *audio, int len, float *level)
00116 {
00117         int big_sample = 0;
00118         int i;
00119 
00120         for ( i = 0; i < len; i++ )
00121         {
00122                 const int sample = abs(audio[i]);
00123                 big_sample = sample > big_sample ?
00124                         sample : big_sample;
00125         }
00126 
00127         *level += ((float)big_sample / 32767.0f - *level) / 5.0f;
00128 }
00129 
00130 static int input_postprocess(void *audio, int len, int rate)
00131 {
00132         static float lowest_volume = 1.0f;
00133         float volume;
00134         int silent = 0;
00135 
00136         if ( !st || speex_state_size != len || speex_state_rate != rate )
00137         {
00138                 if (st)
00139                         speex_preprocess_state_destroy(st);
00140                 st = speex_preprocess_state_init(len,rate);
00141                 speex_state_size = len;
00142                 speex_state_rate = rate;
00143                 set_speex_filters();
00144         }
00145 
00146         calculate_level((short *)audio, len, &input_level);
00147 
00148         /* only preprocess if we're interested in VAD, AGC, or DENOISE */
00149         if ( (iaxci_filters & (IAXC_FILTER_DENOISE | IAXC_FILTER_AGC)) ||
00150                         iaxci_silence_threshold > 0.0f )
00151                 silent = !speex_preprocess(st, (spx_int16_t *)audio, NULL);
00152 
00153         /* Analog AGC: Bring speex AGC gain out to mixer, with lots of hysteresis */
00154         /* use a higher continuation threshold for AAGC than for VAD itself */
00155         if ( !silent &&
00156                         iaxci_silence_threshold != 0.0f &&
00157                         (iaxci_filters & IAXC_FILTER_AGC) &&
00158                         (iaxci_filters & IAXC_FILTER_AAGC) &&
00159                         st->speech_prob > 0.20f )
00160         {
00161                 static int i = 0;
00162 
00163                 i++;
00164 
00165                 if ( (i & 0x3f) == 0 )
00166                 {
00167                         const float loudness = st->loudness2;
00168 
00169                         if ( loudness > 8000.0f || loudness < 4000.0f )
00170                         {
00171                                 const float level = iaxc_input_level_get();
00172 
00173                                 if ( loudness > 16000.0f && level > 0.5f )
00174                                 {
00175                                         /* lower quickly if we're really too hot */
00176                                         iaxc_input_level_set(level - 0.2f);
00177                                 }
00178                                 else if ( loudness > 8000.0f && level >= 0.15f )
00179                                 {
00180                                         /* lower less quickly if we're a bit too hot */
00181                                         iaxc_input_level_set(level - 0.1f);
00182                                 }
00183                                 else if ( loudness < 4000.0f && level <= 0.9f )
00184                                 {
00185                                         /* raise slowly if we're cold */
00186                                         iaxc_input_level_set(level + 0.1f);
00187                                 }
00188                         }
00189                 }
00190         }
00191 
00192         /* This is ugly. Basically just don't get volume level if speex thought
00193          * we were silent. Just set it to 0 in that case */
00194         if ( iaxci_silence_threshold > 0.0f && silent )
00195                 input_level = 0.0f;
00196 
00197         do_level_callback();
00198 
00199         volume = vol_to_db(input_level);
00200 
00201         if ( volume < lowest_volume )
00202                 lowest_volume = volume;
00203 
00204         if ( iaxci_silence_threshold > 0.0f )
00205                 return silent;
00206         else
00207                 return volume < iaxci_silence_threshold;
00208 }
00209 
00210 static int output_postprocess(void *audio, int len)
00211 {
00212         calculate_level((short *)audio, len, &output_level);
00213 
00214         do_level_callback();
00215 
00216         return 0;
00217 }
00218 
00219 static struct iaxc_audio_codec *create_codec(int format)
00220 {
00221         switch (format & IAXC_AUDIO_FORMAT_MASK)
00222         {
00223 #ifdef CODEC_GSM
00224         case IAXC_FORMAT_GSM:
00225                 return codec_audio_gsm_new();
00226 #endif
00227         case IAXC_FORMAT_ULAW:
00228                 return codec_audio_ulaw_new();
00229         case IAXC_FORMAT_ALAW:
00230                 return codec_audio_alaw_new();
00231         case IAXC_FORMAT_SPEEX:
00232                 return codec_audio_speex_new(&speex_settings);
00233 #ifdef CODEC_ILBC
00234         case IAXC_FORMAT_ILBC:
00235                 return codec_audio_ilbc_new();
00236 #endif
00237         default:
00238                 /* ERROR: codec not supported */
00239                 fprintf(stderr, "ERROR: Codec not supported: %d\n", format);
00240                 return NULL;
00241         }
00242 }
00243 
00244 EXPORT void iaxc_set_speex_settings(int decode_enhance, float quality,
00245                 int bitrate, int vbr, int abr, int complexity)
00246 {
00247         speex_settings.decode_enhance = decode_enhance;
00248         speex_settings.quality = quality;
00249         speex_settings.bitrate = bitrate;
00250         speex_settings.vbr = vbr;
00251         speex_settings.abr = abr;
00252         speex_settings.complexity = complexity;
00253 }
00254 
00255 int audio_send_encoded_audio(struct iaxc_call *call, int callNo, void *data,
00256                 int format, int samples)
00257 {
00258         unsigned char outbuf[1024];
00259         int outsize = 1024;
00260         int silent;
00261         int insize = samples;
00262 
00263         /* update last input timestamp */
00264         gettimeofday( &timeLastInput, NULL ) ;
00265 
00266         silent = input_postprocess(data, insize, 8000);
00267 
00268         if(silent)
00269         {
00270                 if(!call->tx_silent)
00271                 {  /* send a Comfort Noise Frame */
00272                         call->tx_silent = 1;
00273                         if ( iaxci_filters & IAXC_FILTER_CN )
00274                                 iax_send_cng(call->session, 10, NULL, 0);
00275                 }
00276                 return 0;  /* poof! no encoding! */
00277         }
00278 
00279         /* we're going to send voice now */
00280         call->tx_silent = 0;
00281 
00282         /* destroy encoder if it is incorrect type */
00283         if(call->encoder && call->encoder->format != format)
00284         {
00285                 call->encoder->destroy(call->encoder);
00286                 call->encoder = NULL;
00287         }
00288 
00289         /* just break early if there's no format defined: this happens for the
00290          * first couple of frames of new calls */
00291         if(format == 0) return 0;
00292 
00293         /* create encoder if necessary */
00294         if(!call->encoder)
00295         {
00296                 call->encoder = create_codec(format);
00297         }
00298 
00299         if(!call->encoder)
00300         {
00301                 /* ERROR: no codec */
00302                 fprintf(stderr, "ERROR: Codec could not be created: %d\n", format);
00303                 return 0;
00304         }
00305 
00306         if(call->encoder->encode(call->encoder, &insize, (short *)data,
00307                                 &outsize, outbuf))
00308         {
00309                 /* ERROR: codec error */
00310                 fprintf(stderr, "ERROR: encode error: %d\n", format);
00311                 return 0;
00312         }
00313 
00314         if(samples-insize == 0)
00315         {
00316                 fprintf(stderr, "ERROR encoding (no samples output (samples=%d)\n", samples);
00317                 return -1;
00318         }
00319 
00320         // Send the encoded audio data back to the app if required
00321         // TODO: fix the stupid way in which the encoded audio size is returned
00322         if ( iaxc_get_audio_prefs() & IAXC_AUDIO_PREF_RECV_LOCAL_ENCODED )
00323                 iaxci_do_audio_callback(callNo, 0, IAXC_SOURCE_LOCAL, 1,
00324                                 call->encoder->format & IAXC_AUDIO_FORMAT_MASK,
00325                                 sizeof(outbuf) - outsize, outbuf);
00326 
00327         if(iax_send_voice(call->session,format, outbuf,
00328                                 sizeof(outbuf) - outsize, samples-insize) == -1)
00329         {
00330                 puts("Failed to send voice!");
00331                 return -1;
00332         }
00333 
00334         return 0;
00335 }
00336 
00337 /* decode encoded audio; return the number of bytes decoded
00338  * negative indicates error */
00339 int audio_decode_audio(struct iaxc_call * call, void * out, void * data, int len,
00340                 int format, int * samples)
00341 {
00342         int insize = len;
00343         int outsize = *samples;
00344 
00345         gettimeofday( &timeLastOutput, NULL ) ;
00346 
00347         if ( format == 0 )
00348         {
00349                 fprintf(stderr, "audio_decode_audio: Format is zero (should't happen)!\n");
00350                 return -1;
00351         }
00352 
00353         /* destroy decoder if it is incorrect type */
00354         if ( call->decoder && call->decoder->format != format )
00355         {
00356                 call->decoder->destroy(call->decoder);
00357                 call->decoder = NULL;
00358         }
00359 
00360         /* create decoder if necessary */
00361         if ( !call->decoder )
00362         {
00363                 call->decoder = create_codec(format);
00364         }
00365 
00366         if ( !call->decoder )
00367         {
00368                 fprintf(stderr, "ERROR: Codec could not be created: %d\n",
00369                                 format);
00370                 return -1;
00371         }
00372 
00373         if ( call->decoder->decode(call->decoder,
00374                                 &insize, (unsigned char *)data,
00375                                 &outsize, (short *)out) )
00376         {
00377                 fprintf(stderr, "ERROR: decode error: %d\n", format);
00378                 return -1;
00379         }
00380 
00381         output_postprocess(out, *samples - outsize);
00382 
00383         *samples = outsize;
00384         return len - insize;
00385 }
00386 
00387 EXPORT int iaxc_get_filters(void)
00388 {
00389         return iaxci_filters;
00390 }
00391 
00392 EXPORT void iaxc_set_filters(int filters)
00393 {
00394         iaxci_filters = filters;
00395         set_speex_filters();
00396 }
00397 
00398 EXPORT void iaxc_set_silence_threshold(float thr)
00399 {
00400         iaxci_silence_threshold = thr;
00401         set_speex_filters();
00402 }
00403 

Generated on Mon Sep 24 15:43:29 2007 for IAXClient by  doxygen 1.5.3