spandsp  0.0.6
fast_convert.h
1 /*
2  * SpanDSP - a series of DSP components for telephony
3  *
4  * fast_convert.h - Quick ways to convert floating point numbers to integers
5  *
6  * Written by Steve Underwood <steveu@coppice.org>
7  *
8  * Copyright (C) 2009 Steve Underwood
9  *
10  * All rights reserved.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU Lesser General Public License version 2.1,
14  * as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this program; if not, write to the Free Software
23  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25 
26 #if !defined(_SPANDSP_FAST_CONVERT_H_)
27 #define _SPANDSP_FAST_CONVERT_H_
28 
29 #if defined(__cplusplus)
30 extern "C"
31 {
32 #endif
33 
34 /* The following code, to handle issues with lrint() and lrintf() on various
35  * platforms, is adapted from similar code in libsndfile, which is:
36  *
37  * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com>
38  *
39  * This program is free software; you can redistribute it and/or modify
40  * it under the terms of the GNU Lesser General Public License as published by
41  * the Free Software Foundation; either version 2.1 of the License, or
42  * (at your option) any later version.
43  *
44  * This program is distributed in the hope that it will be useful,
45  * but WITHOUT ANY WARRANTY; without even the implied warranty of
46  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47  * GNU Lesser General Public License for more details.
48  */
49 
50 /*
51  * On Intel Pentium processors (especially PIII and probably P4), converting
52  * from float to int is very slow. To meet the C specs, the code produced by
53  * most C compilers targeting Pentium needs to change the FPU rounding mode
54  * before the float to int conversion is performed.
55  *
56  * Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
57  * is this flushing of the pipeline which is so slow.
58  *
59  * Fortunately the ISO C99 specification defines the functions lrint, lrintf,
60  * llrint and llrintf which fix this problem as a side effect.
61  *
62  * On Unix-like systems, the configure process should have detected the
63  * presence of these functions. If they weren't found we have to replace them
64  * here with a standard C cast.
65  */
66 
67 /*
68  * The C99 prototypes for these functions are as follows:
69  *
70  * int rintf(float x);
71  * int rint(double x);
72  * long int lrintf(float x);
73  * long int lrint(double x);
74  * long long int llrintf(float x);
75  * long long int llrint(double x);
76  *
77  * The presence of the required functions are detected during the configure
78  * process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
79  * the config file.
80  */
81 
82 #if defined(__CYGWIN__)
83 #if !defined(__cplusplus) && (__GNUC__ < 4)
84  /*
85  * CYGWIN versions prior to 1.7.1 have lrint and lrintf functions, but
86  * they are slow and buggy:
87  * http://sourceware.org/ml/cygwin/2005-06/msg00153.html
88  * http://sourceware.org/ml/cygwin/2005-09/msg00047.html
89  * These replacement functions (pulled from the Public Domain MinGW
90  * math.h header) replace the native versions.
91  */
92  static __inline__ long int lrint(double x)
93  {
94  long int retval;
95 
96  __asm__ __volatile__
97  (
98  "fistpl %0"
99  : "=m" (retval)
100  : "t" (x)
101  : "st"
102  );
103 
104  return retval;
105  }
106 
107  static __inline__ long int lrintf(float x)
108  {
109  long int retval;
110 
111  __asm__ __volatile__
112  (
113  "fistpl %0"
114  : "=m" (retval)
115  : "t" (x)
116  : "st"
117  );
118  return retval;
119  }
120 #endif
121 
122  /* The fastest way to convert is the equivalent of lrint() */
123  static __inline__ long int lfastrint(double x)
124  {
125  long int retval;
126 
127  __asm__ __volatile__
128  (
129  "fistpl %0"
130  : "=m" (retval)
131  : "t" (x)
132  : "st"
133  );
134 
135  return retval;
136  }
137 
138  static __inline__ long int lfastrintf(float x)
139  {
140  long int retval;
141 
142  __asm__ __volatile__
143  (
144  "fistpl %0"
145  : "=m" (retval)
146  : "t" (x)
147  : "st"
148  );
149  return retval;
150  }
151 #elif defined(__GNUC__) || (__SUNPRO_C >= 0x0590)
152 
153 #if defined(__i386__)
154  /* These routines are guaranteed fast on an i386 machine. Using the built in
155  lrint() and lrintf() should be similar, but they may not always be enabled.
156  Sometimes, especially with "-O0", you might get slow calls to routines. */
157  static __inline__ long int lfastrint(double x)
158  {
159  long int retval;
160 
161  __asm__ __volatile__
162  (
163  "fistpl %0"
164  : "=m" (retval)
165  : "t" (x)
166  : "st"
167  );
168 
169  return retval;
170  }
171 
172  static __inline__ long int lfastrintf(float x)
173  {
174  long int retval;
175 
176  __asm__ __volatile__
177  (
178  "fistpl %0"
179  : "=m" (retval)
180  : "t" (x)
181  : "st"
182  );
183  return retval;
184  }
185 #elif defined(__x86_64__)
186  /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a
187  double or float to an int. It looks like the design on the x86_64 took account
188  of the default behaviour specified for C. */
189  static __inline__ long int lfastrint(double x)
190  {
191  return (long int) (x);
192  }
193 
194  static __inline__ long int lfastrintf(float x)
195  {
196  return (long int) (x);
197  }
198 #elif (defined(__ppc__) || defined(__powerpc__)) && !defined(__NO_FPRS__)
199  static __inline__ long int lfastrint(register double x)
200  {
201  int res[2];
202 
203  __asm__ __volatile__
204  (
205  "fctiw %1, %1\n\t"
206  "stfd %1, %0"
207  : "=m" (res) /* Output */
208  : "f" (x) /* Input */
209  : "memory"
210  );
211 
212  return res[1];
213  }
214 
215  static __inline__ long int lfastrintf(register float x)
216  {
217  int res[2];
218 
219  __asm__ __volatile__
220  (
221  "fctiw %1, %1\n\t"
222  "stfd %1, %0"
223  : "=m" (res) /* Output */
224  : "f" (x) /* Input */
225  : "memory"
226  );
227 
228  return res[1];
229  }
230 #else
231  /* Fallback routines, for unrecognised platforms */
232  static __inline__ long int lfastrint(double x)
233  {
234  return (long int) x;
235  }
236 
237  static __inline__ long int lfastrintf(float x)
238  {
239  return (long int) x;
240  }
241 #endif
242 
243 #elif defined(_M_IX86)
244  /* Visual Studio i386 */
245  /*
246  * Win32 doesn't seem to have the lrint() and lrintf() functions.
247  * Therefore implement inline versions of these functions here.
248  */
249 
250 #if (_MSC_VER < 1800)
251  __inline long int lrint(double x)
252  {
253  long int i;
254 
255  _asm
256  {
257  fld x
258  fistp i
259  };
260  return i;
261  }
262 
263  __inline long int lrintf(float x)
264  {
265  long int i;
266 
267  _asm
268  {
269  fld x
270  fistp i
271  };
272  return i;
273  }
274 
275  __inline float rintf(float flt)
276  {
277  _asm
278  {
279  fld flt
280  frndint
281  }
282  }
283 
284  __inline double rint(double dbl)
285  {
286  _asm
287  {
288  fld dbl
289  frndint
290  }
291  }
292 #endif
293 
294  __inline long int lfastrint(double x)
295  {
296  long int i;
297 
298  _asm
299  {
300  fld x
301  fistp i
302  };
303  return i;
304  }
305 
306  __inline long int lfastrintf(float x)
307  {
308  long int i;
309 
310  _asm
311  {
312  fld x
313  fistp i
314  };
315  return i;
316  }
317 #elif defined(_M_X64)
318  /* Visual Studio x86_64 */
319  /* x86_64 machines will do best with a simple assignment. */
320 #include <intrin.h>
321 
322 #if (_MSC_VER < 1800)
323  __inline long int lrint(double x)
324  {
325  return (long int)_mm_cvtsd_si64x(_mm_loadu_pd((const double *) &x));
326  }
327 
328  __inline long int lrintf(float x)
329  {
330  return _mm_cvt_ss2si(_mm_load_ss((const float *) &x));
331  }
332 #endif
333 
334  __inline long int lfastrint(double x)
335  {
336  return (long int) (x);
337  }
338 
339  __inline long int lfastrintf(float x)
340  {
341  return (long int) (x);
342  }
343 #elif defined(__MWERKS__) && defined(macintosh)
344  /* This MacOS 9 solution was provided by Stephane Letz */
345 
346  long int __inline__ lfastrint(register double x)
347  {
348  long int res[2];
349 
350  asm
351  {
352  fctiw x, x
353  stfd x, res
354  }
355  return res[1];
356  }
357 
358  long int __inline__ lfastrintf(register float x)
359  {
360  long int res[2];
361 
362  asm
363  {
364  fctiw x, x
365  stfd x, res
366  }
367  return res[1];
368  }
369 #elif defined(__MACH__) && defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
370  /* For Apple Mac OS/X - do recent versions still need this? */
371 
372  static __inline__ long int lfastrint(register double x)
373  {
374  int res[2];
375 
376  __asm__ __volatile__
377  (
378  "fctiw %1, %1\n\t"
379  "stfd %1, %0"
380  : "=m" (res) /* Output */
381  : "f" (x) /* Input */
382  : "memory"
383  );
384 
385  return res[1];
386  }
387 
388  static __inline__ long int lfastrintf(register float x)
389  {
390  int res[2];
391 
392  __asm__ __volatile__
393  (
394  "fctiw %1, %1\n\t"
395  "stfd %1, %0"
396  : "=m" (res) /* Output */
397  : "f" (x) /* Input */
398  : "memory"
399  );
400 
401  return res[1];
402  }
403 #else
404  /* There is nothing else to do, but use a simple casting operation, instead of a real
405  rint() type function. Since we are only trying to use rint() to speed up conversions,
406  the accuracy issues related to changing the rounding scheme are of little concern
407  to us. */
408 
409  #if !defined(__sgi) && !defined(__sunos) && !defined(__solaris) && !defined(__sun)
410  #warning "No usable lrint() and lrintf() functions available."
411  #warning "Replacing these functions with a simple C cast."
412  #endif
413 
414  static __inline__ long int lrint(double x)
415  {
416  return (long int) (x);
417  }
418 
419  static __inline__ long int lrintf(float x)
420  {
421  return (long int) (x);
422  }
423 
424  static __inline__ long int lfastrint(double x)
425  {
426  return (long int) (x);
427  }
428 
429  static __inline__ long int lfastrintf(float x)
430  {
431  return (long int) (x);
432  }
433 #endif
434 
435 #if defined(__cplusplus)
436 }
437 #endif
438 
439 #endif
440 
441 /*- End of file ------------------------------------------------------------*/