nmpp/fft__old_8h_source.html

//***************************************************************************/

//*                     RC Module Inc., Moscow, Russia                      */

//*                     NeuroMatrix(r) NM6403 Software                      */

//*                                                                         */

//*   Fast Fourie Transform Library                                         */

//*  (C-callable functions)                                                 */

//*                                                                         */

//*  $Workfile:: sFFT.h                                                    $*/

//*  Contents:         Header file of FFT routines                          */

//*                                                                         */

//*                                                                         */

//*  Author:     S.Mushkaev                                                 */

//*                                                                         */

//*  Version         1.0                                                    */

//*  Start    Date:  03.07.2001                                             */

//*  Release $Date: 2005/07/13 14:19:56 $                                   */

//*                                                                         */

//*                                                                         */

//***************************************************************************/


// LIBRARY  nmfft.lib


#ifndef _SFFT_H_INCLUDED_

#define _SFFT_H_INCLUDED_


#include "nmtype.h"


/* \mainpage Введение

 *

 * \htmlinclude intro.html

 * \section intro_sec Introduction

 *

 * This is the introduction.

 *

 * \section install_sec Installation

 *

 * \subsection step1 Step 1: Opening the box

 *

 * etc...


 */


//*****************************************************************************


//*****************************************************************************


/*

#ifndef _NMCMPLX_H_INCLUDED_

    struct nm32sc

    {

        int re;//Real;

        int im;//Imag;

        nm32sc():

            re(0),im(0){}; //Real(0),Imag(0){}

        nm32sc(int _Real,int _Imag):

            re(_Real),im(_Imag){};//Real(_Real),Imag(_Imag){}

    };

#endif //_NMCMPLX_H_INCLUDED_

*/

//#include "nmpp.h"


// The functions listed below are forward and inversed FFT routines for

// 256,512,1024 or 2048-point compex data, represented as arrays of nm32sc type.

// Each complex number is stored in 64-bit word.

// The lower  32-bits is real part of complex number.

// The higher 32-bits is imaginary part of complex number;

// The admissible input range of data depends on dimension of array,

// mode of calculation accuracy and on/off mode of intermediate and final scaling down (shift normalization) of results.

// This range guarantee against overflow during calculation process.

// The table of ranges you may find in "FFT Library Programmer's manual"

//

// The mode of calculation accuracy tells how sine-cosine coeffecients are represented if fixed-point format.

// When the 7-bit accuracy mode is used, output shape accuracy approaches of the maximum,

// but output is reduced by around 2%.

// If the 6-bit accuracy mode is used, then output range corresponds to result of Fourie transform

// based on floating-point arithmetic, but output is less precise.

// The mode of calulation accuracy may be set or switched by appropriate ***Set6bit() or ***Set7bit() function

// NOTE: At least on time the accuracy setting function must be called before FFT routine executing.

#include "malloc32.h"


#ifdef __cplusplus

        extern "C" {

#endif

//=============================  Forward FFT 256 =====================================


void FFT_Fwd256Set6bit();   // Sets 6-bit accuracy of sin-cosine coefficients

void FFT_Fwd256Set7bit();   // Sets 7-bit accuracy of sin-cosine coefficients


//                  The performance of the FFT_Fwd256 routine depends on memory allocation for

//                      input,output and temporary buffers.

//                      For the maximum speed performance it is recommended

//                      to use the following configuration:

//                          GSrcBuffer: Global SRAM

//                          LDstBuffer: Local  SRAM

//                          LBuffer   : Local  SRAM

//                          GBuffer   : Global SRAM

//

//                      For this configuration the following results were achieved:

//                          3994 clocks - full operation (0.1 ms at 40MHz CPU)

//                          3662 clocks - without final normalization (0.092ms at 40MHz CPU)

//

//                      If you are not going to use this routine as a C callable function,

//                      you can reduce the number of instructions removing all stack operations. In this case

//                      the total execution time can be reduced by around 50 clocks.


void  FFT_Fwd256(

            nm32sc* GSrcBuffer,     // Source buffer :long[256]

            nm32sc* LDstBuffer,     // Result FFT    :long[256]

            void*       LBuffer,    // Temp buffer   :long[256*3]

            void*       GBuffer,    // Temp buffer   :long[256*2]

            int         ShiftR=-1   // Shift normalization  by default it means ShiftR=14 at 7 bit precision and   ShiftR=12 at 6 bit precision

            );

#include "time.h"

#include "malloc32.h"


//  int  nmppsFFT256FwdInitAlloc(Malloc32Func* allocate,  Free32Func* free, NmppsFFTSpec* spec);

//  void nmppsFFT256FwdOptimize(void* src, void* dst, uint64* allocOrder);

//  void nmppsFFT256Fwd(nm32sc* src, nm32sc* dst, NmppsFFTSpec* spec);

//

//  void nmppsFFTFree(NmppsFFTSpec* spec );


   /*

struct s_fft_fwd256_settings {

    int8x8* dataSinCos0;

    int     sizeSinCos0; // in int64

    int     bitsSinCos0;

    int     shift0;

    int8x8* dataSinCos1;

    int     sizeSinCos1; // in int64

    int     bitsSinCos1;

    int     shift1;

};*/


 /*


s_fft_fwd256_settings s_fft256_default={0,1,1,1,0,1,1,1};


void FFT_Fwd256_ (

    int32x2* pSrc,

    int32x2* pDst,

    int64* tmp0,

    int64* tmp1,

    s_fft_fwd256_settings* s=&s_fft256_default

);

*/


void FFT_Inv256Set6bit();   // Sets 6-bit accuracy of sin-cosine coefficients


void FFT_Inv256Set7bit();   // Sets 7-bit accuracy of sin-cosine coefficients


void  FFT_Inv256(

            nm32sc* GSrcBuffer, // Source buffer :long[256]

            nm32sc* GDstBuffer, // Result FFT    :long[256]

            void*       LBuffer,    // Temp buffer   :long[256*3]

            void*       GBuffer,    // Temp buffer   :long[256*3]

            int         ShiftR1=8,  // Intermediate shift normalization

            int         ShiftR2=-1  // Final shift normalization

                                    // by default it means ShiftR2=14 at 7 bit precision

                                    //               and   ShiftR2=12 at 6 bit precision

            );


//================================= FFT 512 =================================================

void FFT_Fwd512Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients

void FFT_Fwd512Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients


void  FFT_Fwd512(

            nm32sc* GSrcBuffer, // Source buffer :long[512]

            nm32sc* GDstBuffer, // Result FFT    :long[512]

            void*       LBuffer,    // Temp buffer   :long[512*3]

            void*       GBuffer,    // Temp buffer   :long[512*3]

            int         ShiftR=-1   // Right shift normalization

            );

//                  \en The performance of the FFT_Fwd512 routine depends on memory allocation for

//                      input,output and temporary buffers.

//                      For the maximum speed performance it is recommended

//                      to use the following configuration:

//                          GSrcBuffer: Global SRAM

//                          GDstBuffer: Local  SRAM

//                          LBuffer   : Local  SRAM

//                          GBuffer   : Global SRAM

//

//                      For this configuration the following results were achieved:

//                          8766 clocks - full operation        (0.22 ms at 40MHz CPU)

//                          8180 clocks - without normalization (0.2 ms at 40MHz CPU)

//

//                      If you are not going to use this routine as a C callable function,

//                      you can reduce the number of instructions removing all stack operations. In this case

//                      the total execution time can be reduced by around 50 clocks.


//======================================= Inversed FFT 512 ============================================

void FFT_Inv512Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients

void FFT_Inv512Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients


void  FFT_Inv512(

            nm32sc* GSrcBuffer, // Source buffer :long[512]

            nm32sc* LDstBuffer, // Result FFT    :long[512]

            void*       LBuffer,    // Temp buffer   :long[512*3]

            void*       GBuffer,    // Temp buffer   :long[512*3]

            int         ShiftR1=9,  // First shift normalization

            int         ShiftR2=-1  // Final shift normalization

            );

//          \en The performance of the FFT_Fwd256 routine depends on memory allocation for

//                  input,output and temporary buffers.

//                  For the maximum speed performance it is recommended

//                  to use the following configuration:

//                      GSrcBuffer: Global SRAM

//                      LDstBuffer: Local  SRAM

//                      LBuffer   : Local  SRAM

//                      GBuffer   : Global SRAM

//

//                  For this configuration the following results were achieved:

//                      9407 clocks - full operation (0.24ms at 40MHz CPU)

//                      8199 clocks - without normalization (0.2ms at 40Mhz CPU)

//

//                  If you are not going to use this routine as a C callable function,

//                  you can reduce the number of instructions removing all stack operations. In this case

//                  the total execution time can be reduced by around 50 clocks.


//========================================= FFT1024 ==================================================

    void FFT_Fwd1024Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients

    void FFT_Fwd1024Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients


void  FFT_Fwd1024(

            nm32sc* GSrcBuffer, // Source buffer :long[1024]

            nm32sc* LDstBuffer, // Result FFT    :long[1024]

            void*       LBuffer,    // Temp buffer   :long[1024*3]

            void*       GBuffer,    // Temp buffer   :long[1024]

            int         ShiftR=-1   // Right shift normalization

            );

//          \en The performance of the FFT_Fwd1024 routine depends on memory allocation for

//                  input,output and temporary buffers.

//                  For the maximum speed performance it is recommended

//                  to use the following configuration:

//                      GSrcBuffer: Global SRAM

//                      LDstBuffer: Local  SRAM

//                      LBuffer   : Local  SRAM

//                      GBuffer   : Global SRAM

//

//                  For this configuration the following results were achieved:

//                      20041 clocks - full operation       (0.5ms at 40MHz CPU)

//                      18900 clocks - without normalization (0.47ms at 40MHz CPU)

//

//                  If you are not going to use this routine as a C callable function,

//                  you can reduce the number of instructions removing all stack operations. In this case

//                  the total execution time can be reduced by around 50 clocks.


//============================================= Inversed FFT 1024 =======================================

    void FFT_Inv1024Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients

void FFT_Inv1024Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients


void  FFT_Inv1024(

            nm32sc* GSrcBuffer, // Source buffer :long[1024]

            nm32sc* GDstBuffer, // Result FFT    :long[1024]

            void*       LBuffer,    // Temp buffer   :long[1024*3]

            void*       GBuffer,    // Temp buffer   :long[1024*3]

            int         ShiftR1=10, // First Right shift normalization

            int         ShiftR2=-1  // Final Right shift normalization

            );


//          \en The performance of the FFT_Fwd2048 routine depends on memory allocation for

//                  input,output and temporary buffers.

//                  For the maximum speed performance it is recommended

//                  to use the following configuration:

//                      GSrcBuffer: Global SRAM

//                      LDstBuffer: Local  SRAM

//                      LBuffer   : Local  SRAM

//

//                  For this configuration the following results were achieved:

//                      49800 clocks - full operation (1.25ms at 40 MHz CPU)

//                      47624 clocks - without normalization (1.2ms at 40 MHz CPU)

//

//                  If you are not going to use this routine as a C callable function,

//                  you can reduce the number of instructions removing all stack operations. In this case

//                  the total execution time can be reduced by around 50 clocks.


//============================================ FFT2048 ============================================

    // Forward FFT 2048

void FFT_Fwd2048Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients

void FFT_Fwd2048Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients


void  FFT_Fwd2048(

            nm32sc* GSrcBuffer, // Source buffer :long[2048]

            nm32sc* GDstBuffer, // Result FFT    :long[2048]

            void*       LBuffer,    // Temp buffer   :long[2048*4]

            int         ShiftR=-1   // Right shift normalization

            );

//          \en The performance of the FFT_Fwd2048 routine depends on memory allocation for

//                  input,output and temporary buffers.

//                  For the maximum speed performance it is recommended

//                  to use the following configuration:

//                      GSrcBuffer: Global SRAM

//                      LDstBuffer: Local  SRAM

//                      LBuffer   : Local  SRAM

//

//                  For this configuration the following results were achieved:

//                      49800 clocks - full operation (1.25ms at 40 MHz CPU)

//                      47624 clocks - without normalization (1.2ms at 40 MHz CPU)

//

//                  If you are not going to use this routine as a C callable function,

//                  you can reduce the number of instructions removing all stack operations. In this case

//                  the total execution time can be reduced by around 50 clocks.


//=========================================== Inversed FFT 2048 ==================================

    void FFT_Inv2048Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients

    void FFT_Inv2048Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients


void  FFT_Inv2048(

            nm32sc* GSrcBuffer, // Source buffer :long[2048]

            nm32sc* LDstBuffer, // Result FFT    :long[2048]

            void*       LBuffer,    // Temp buffer   :long[2048*4]

            void*       GBuffer,    // Temp buffer   :long[2048*4]

            int         ShiftR1=11, // First Right shift normalization

            int         ShiftR2=-1  // Final Right shift normalization

            );

//          \en The performance of the FFT_Fwd256 routine depends on memory allocation for

//                  input,output and temporary buffers.

//                  For the maximum speed performance it is recommended

//                  to use the following configuration:

//                      GSrcBuffer: Global SRAM

//                      LDstBuffer: Local SRAM

//                      LBuffer   : Local  SRAM

//                      GBuffer   : Global SRAM

//

//                  For this configuration the following results were achieved:

//                      52160 clocks - full operation (1.3 ms at 40MHz CPU)

//                      47780 clocks - without both normalizations (1.2ms at 40MHz CPU)

//

//                  If you are not going to use this routine as a C callable function,

//                  you can reduce the number of instructions removing all stack operations. In this case

//                  the total execution time can be reduced by around 50 clocks.#include "nmfft.h"


//============================================ FFT4096 ============================================

    // Forward FFT 4096


void FFT_Fwd4096(

            nm32sc* GSrcBuffer,     // Source buffer :long[4096]

            nm32sc* GDstBuffer,     // Result FFT    :long[4096]

            void*       LBuffer,    // Temp buffer   :long[4096*2]

            void*       GBuffer     // Temp buffer   :long[4096*3]

            );


//=========================================== Inversed FFT 4096 ==================================

void FFT_Inv4096(

            nm32sc* GSrcBuffer,     // Source buffer :long[4096]

            nm32sc* GDstBuffer,     // Result FFT    :long[4096]

            void*       LBuffer,    // Temp buffer   :long[4096*2]

            void*       GBuffer     // Temp buffer   :long[4096*3]

            );


//============================================ FFT8192 ============================================

    // Forward FFT 8192


void FFT_Fwd8192(

            nm32sc* LSrcBuffer,     // Source buffer :long[8192]

            nm32sc* GDstBuffer,     // Result FFT    :long[8192]

            void*       LBuffer,    // Temp buffer   :long[8192]

            void*       GBuffer     // Temp buffer   :long[8192*3]

            );


//=========================================== Inversed FFT 8192 ==================================

void FFT_Inv8192(

            nm32sc* LSrcBuffer,     // Source buffer :long[8192]

            nm32sc* GDstBuffer,     // Result FFT    :long[8192]

            void*       LBuffer,    // Temp buffer   :long[8192]

            void*       GBuffer     // Temp buffer   :long[8192*3]

            );


#ifdef __cplusplus

        };

#endif


#endif

FFT_Fwd1024Set6bit
void FFT_Fwd1024Set6bit()
Устанавливает 6-битную точность вычислений

FFT_Fwd1024Set7bit
void FFT_Fwd1024Set7bit()
Устанавливает 7-битную точность вычислений

FFT_Fwd1024
void FFT_Fwd1024(nm32sc *GSrcBuffer, nm32sc *LDstBuffer, void *LBuffer, void *GBuffer, int ShiftR=-1)
Прямое быстрое преобразование Фурье-1024.

FFT_Fwd2048Set7bit
void FFT_Fwd2048Set7bit()
Устанавливает 7-битную точность вычислений

FFT_Fwd2048Set6bit
void FFT_Fwd2048Set6bit()
Устанавливает 6-битную точность вычислений

FFT_Fwd2048
void FFT_Fwd2048(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, int ShiftR=-1)
Прямое быстрое преобразование Фурье-2048.

FFT_Fwd256
void FFT_Fwd256(nm32sc *GSrcBuffer, nm32sc *LDstBuffer, void *LBuffer, void *GBuffer, int ShiftR=-1)
Прямое быстрое преобразование Фурье-256.

FFT_Fwd256Set7bit
void FFT_Fwd256Set7bit()
Устанавливает 7-битную точность вычислений

FFT_Fwd256Set6bit
void FFT_Fwd256Set6bit()
Устанавливает 6-битную точность вычислений

FFT_Fwd4096
void FFT_Fwd4096(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer)
Прямое быстрое преобразование Фурье-4096.

FFT_Fwd512Set7bit
void FFT_Fwd512Set7bit()
Устанавливает 7-битную точность вычислений

FFT_Fwd512
void FFT_Fwd512(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer, int ShiftR=-1)
Прямое быстрое преобразование Фурье-512.

FFT_Fwd512Set6bit
void FFT_Fwd512Set6bit()
Устанавливает 6-битную точность вычислений

FFT_Fwd8192
void FFT_Fwd8192(nm32sc *LSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer)
Прямое быстрое преобразование Фурье-8192.

FFT_Inv1024Set6bit
void FFT_Inv1024Set6bit()
Устанавливает 6-битную точность вычислений

FFT_Inv1024
void FFT_Inv1024(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer, int ShiftR1=10, int ShiftR2=-1)
Обратное быстрое преобразование Фурье. ОБПФ-1024.

FFT_Inv1024Set7bit
void FFT_Inv1024Set7bit()
Устанавливает 7-битную точность вычислений

FFT_Inv2048Set7bit
void FFT_Inv2048Set7bit()
Устанавливает 7-битную точность вычислений

FFT_Inv2048
void FFT_Inv2048(nm32sc *GSrcBuffer, nm32sc *LDstBuffer, void *LBuffer, void *GBuffer, int ShiftR1=11, int ShiftR2=-1)
Обратное быстрое преобразование Фурье. ОБПФ-2048.

FFT_Inv2048Set6bit
void FFT_Inv2048Set6bit()
Устанавливает 6-битную точность вычислений

FFT_Inv256Set6bit
void FFT_Inv256Set6bit()
Устанавливает 6-битную точность вычислений

FFT_Inv256
void FFT_Inv256(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer, int ShiftR1=8, int ShiftR2=-1)
Обратное быстрое преобразование Фурье. ОБПФ-256.

FFT_Inv256Set7bit
void FFT_Inv256Set7bit()
Устанавливает 7-битную точность вычислений

FFT_Inv4096
void FFT_Inv4096(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer)
Обратное быстрое преобразование Фурье. ОБПФ-4096.

FFT_Inv512Set6bit
void FFT_Inv512Set6bit()
Устанавливает 6-битную точность вычислений

FFT_Inv512
void FFT_Inv512(nm32sc *GSrcBuffer, nm32sc *LDstBuffer, void *LBuffer, void *GBuffer, int ShiftR1=9, int ShiftR2=-1)
Обратное быстрое преобразование Фурье. ОБПФ-512.

FFT_Inv512Set7bit
void FFT_Inv512Set7bit()
Устанавливает 7-битную точность вычислений

FFT_Inv8192
void FFT_Inv8192(nm32sc *LSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer)
Обратное быстрое преобразование Фурье. ОБПФ-8192.

nmtype.h

s_nm32sc
Definition: nmtype.h:1301