nmpp
fft_old.h
1//***************************************************************************/
2//* RC Module Inc., Moscow, Russia */
3//* NeuroMatrix(r) NM6403 Software */
4//* */
5//* Fast Fourie Transform Library */
6//* (C-callable functions) */
7//* */
8//* $Workfile:: sFFT.h $*/
9//* Contents: Header file of FFT routines */
10//* */
11//* */
12//* Author: S.Mushkaev */
13//* */
14//* Version 1.0 */
15//* Start Date: 03.07.2001 */
16//* Release $Date: 2005/07/13 14:19:56 $ */
17//* */
18//* */
19//***************************************************************************/
20
21// LIBRARY nmfft.lib
22
23
24
25#ifndef _SFFT_H_INCLUDED_
26#define _SFFT_H_INCLUDED_
27
28
29#include "nmtype.h"
30
31
32/* \mainpage Введение
33 *
34 * \htmlinclude intro.html
35 * \section intro_sec Introduction
36 *
37 * This is the introduction.
38 *
39 * \section install_sec Installation
40 *
41 * \subsection step1 Step 1: Opening the box
42 *
43 * etc...
44
45 */
46
47
48
49//*****************************************************************************
50
51
52
68//*****************************************************************************
69
70
71/*
72#ifndef _NMCMPLX_H_INCLUDED_
73 struct nm32sc
74 {
75 int re;//Real;
76 int im;//Imag;
77 nm32sc():
78 re(0),im(0){}; //Real(0),Imag(0){}
79 nm32sc(int _Real,int _Imag):
80 re(_Real),im(_Imag){};//Real(_Real),Imag(_Imag){}
81 };
82#endif //_NMCMPLX_H_INCLUDED_
83*/
84//#include "nmpp.h"
85
87// The functions listed below are forward and inversed FFT routines for
88// 256,512,1024 or 2048-point compex data, represented as arrays of nm32sc type.
89// Each complex number is stored in 64-bit word.
90// The lower 32-bits is real part of complex number.
91// The higher 32-bits is imaginary part of complex number;
92// The admissible input range of data depends on dimension of array,
93// mode of calculation accuracy and on/off mode of intermediate and final scaling down (shift normalization) of results.
94// This range guarantee against overflow during calculation process.
95// The table of ranges you may find in "FFT Library Programmer's manual"
96//
97// The mode of calculation accuracy tells how sine-cosine coeffecients are represented if fixed-point format.
98// When the 7-bit accuracy mode is used, output shape accuracy approaches of the maximum,
99// but output is reduced by around 2%.
100// If the 6-bit accuracy mode is used, then output range corresponds to result of Fourie transform
101// based on floating-point arithmetic, but output is less precise.
102// The mode of calulation accuracy may be set or switched by appropriate ***Set6bit() or ***Set7bit() function
103// NOTE: At least on time the accuracy setting function must be called before FFT routine executing.
104#include "malloc32.h"
105
106#ifdef __cplusplus
107 extern "C" {
108#endif
109//============================= Forward FFT 256 =====================================
110
111
112
113
126void FFT_Fwd256Set6bit(); // Sets 6-bit accuracy of sin-cosine coefficients
134void FFT_Fwd256Set7bit(); // Sets 7-bit accuracy of sin-cosine coefficients
135
136
137
138// The performance of the FFT_Fwd256 routine depends on memory allocation for
139// input,output and temporary buffers.
140// For the maximum speed performance it is recommended
141// to use the following configuration:
142// GSrcBuffer: Global SRAM
143// LDstBuffer: Local SRAM
144// LBuffer : Local SRAM
145// GBuffer : Global SRAM
146//
147// For this configuration the following results were achieved:
148// 3994 clocks - full operation (0.1 ms at 40MHz CPU)
149// 3662 clocks - without final normalization (0.092ms at 40MHz CPU)
150//
151// If you are not going to use this routine as a C callable function,
152// you can reduce the number of instructions removing all stack operations. In this case
153// the total execution time can be reduced by around 50 clocks.
154
155
156
157
252 nm32sc* GSrcBuffer, // Source buffer :long[256]
253 nm32sc* LDstBuffer, // Result FFT :long[256]
254 void* LBuffer, // Temp buffer :long[256*3]
255 void* GBuffer, // Temp buffer :long[256*2]
256 int ShiftR=-1 // Shift normalization by default it means ShiftR=14 at 7 bit precision and ShiftR=12 at 6 bit precision
257 );
258#include "time.h"
259#include "malloc32.h"
260
261
262
263
264
265
266// int nmppsFFT256FwdInitAlloc(Malloc32Func* allocate, Free32Func* free, NmppsFFTSpec* spec);
267// void nmppsFFT256FwdOptimize(void* src, void* dst, uint64* allocOrder);
268// void nmppsFFT256Fwd(nm32sc* src, nm32sc* dst, NmppsFFTSpec* spec);
269//
270// void nmppsFFTFree(NmppsFFTSpec* spec );
271
272
273
274 /*
275struct s_fft_fwd256_settings {
276 int8x8* dataSinCos0;
277 int sizeSinCos0; // in int64
278 int bitsSinCos0;
279 int shift0;
280 int8x8* dataSinCos1;
281 int sizeSinCos1; // in int64
282 int bitsSinCos1;
283 int shift1;
284};*/
285
286 /*
287
288s_fft_fwd256_settings s_fft256_default={0,1,1,1,0,1,1,1};
289
290void FFT_Fwd256_ (
291 int32x2* pSrc,
292 int32x2* pDst,
293 int64* tmp0,
294 int64* tmp1,
295 s_fft_fwd256_settings* s=&s_fft256_default
296);
297*/
298
300
312void FFT_Inv256Set6bit(); // Sets 6-bit accuracy of sin-cosine coefficients
313
321void FFT_Inv256Set7bit(); // Sets 7-bit accuracy of sin-cosine coefficients
322
447 nm32sc* GSrcBuffer, // Source buffer :long[256]
448 nm32sc* GDstBuffer, // Result FFT :long[256]
449 void* LBuffer, // Temp buffer :long[256*3]
450 void* GBuffer, // Temp buffer :long[256*3]
451 int ShiftR1=8, // Intermediate shift normalization
452 int ShiftR2=-1 // Final shift normalization
453 // by default it means ShiftR2=14 at 7 bit precision
454 // and ShiftR2=12 at 6 bit precision
455 );
456
457
458//================================= FFT 512 =================================================
472void FFT_Fwd512Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients
480void FFT_Fwd512Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients
481
482
578 nm32sc* GSrcBuffer, // Source buffer :long[512]
579 nm32sc* GDstBuffer, // Result FFT :long[512]
580 void* LBuffer, // Temp buffer :long[512*3]
581 void* GBuffer, // Temp buffer :long[512*3]
582 int ShiftR=-1 // Right shift normalization
583 );
584// \en The performance of the FFT_Fwd512 routine depends on memory allocation for
585// input,output and temporary buffers.
586// For the maximum speed performance it is recommended
587// to use the following configuration:
588// GSrcBuffer: Global SRAM
589// GDstBuffer: Local SRAM
590// LBuffer : Local SRAM
591// GBuffer : Global SRAM
592//
593// For this configuration the following results were achieved:
594// 8766 clocks - full operation (0.22 ms at 40MHz CPU)
595// 8180 clocks - without normalization (0.2 ms at 40MHz CPU)
596//
597// If you are not going to use this routine as a C callable function,
598// you can reduce the number of instructions removing all stack operations. In this case
599// the total execution time can be reduced by around 50 clocks.
600
601//======================================= Inversed FFT 512 ============================================
614void FFT_Inv512Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients
622void FFT_Inv512Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients
623
746 nm32sc* GSrcBuffer, // Source buffer :long[512]
747 nm32sc* LDstBuffer, // Result FFT :long[512]
748 void* LBuffer, // Temp buffer :long[512*3]
749 void* GBuffer, // Temp buffer :long[512*3]
750 int ShiftR1=9, // First shift normalization
751 int ShiftR2=-1 // Final shift normalization
752 );
753// \en The performance of the FFT_Fwd256 routine depends on memory allocation for
754// input,output and temporary buffers.
755// For the maximum speed performance it is recommended
756// to use the following configuration:
757// GSrcBuffer: Global SRAM
758// LDstBuffer: Local SRAM
759// LBuffer : Local SRAM
760// GBuffer : Global SRAM
761//
762// For this configuration the following results were achieved:
763// 9407 clocks - full operation (0.24ms at 40MHz CPU)
764// 8199 clocks - without normalization (0.2ms at 40Mhz CPU)
765//
766// If you are not going to use this routine as a C callable function,
767// you can reduce the number of instructions removing all stack operations. In this case
768// the total execution time can be reduced by around 50 clocks.
769
770
771//========================================= FFT1024 ==================================================
784 void FFT_Fwd1024Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients
793 void FFT_Fwd1024Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients
794
887 nm32sc* GSrcBuffer, // Source buffer :long[1024]
888 nm32sc* LDstBuffer, // Result FFT :long[1024]
889 void* LBuffer, // Temp buffer :long[1024*3]
890 void* GBuffer, // Temp buffer :long[1024]
891 int ShiftR=-1 // Right shift normalization
892 );
893// \en The performance of the FFT_Fwd1024 routine depends on memory allocation for
894// input,output and temporary buffers.
895// For the maximum speed performance it is recommended
896// to use the following configuration:
897// GSrcBuffer: Global SRAM
898// LDstBuffer: Local SRAM
899// LBuffer : Local SRAM
900// GBuffer : Global SRAM
901//
902// For this configuration the following results were achieved:
903// 20041 clocks - full operation (0.5ms at 40MHz CPU)
904// 18900 clocks - without normalization (0.47ms at 40MHz CPU)
905//
906// If you are not going to use this routine as a C callable function,
907// you can reduce the number of instructions removing all stack operations. In this case
908// the total execution time can be reduced by around 50 clocks.
909
910//============================================= Inversed FFT 1024 =======================================
923 void FFT_Inv1024Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients
931void FFT_Inv1024Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients
932
1057 nm32sc* GSrcBuffer, // Source buffer :long[1024]
1058 nm32sc* GDstBuffer, // Result FFT :long[1024]
1059 void* LBuffer, // Temp buffer :long[1024*3]
1060 void* GBuffer, // Temp buffer :long[1024*3]
1061 int ShiftR1=10, // First Right shift normalization
1062 int ShiftR2=-1 // Final Right shift normalization
1063 );
1064
1065// \en The performance of the FFT_Fwd2048 routine depends on memory allocation for
1066// input,output and temporary buffers.
1067// For the maximum speed performance it is recommended
1068// to use the following configuration:
1069// GSrcBuffer: Global SRAM
1070// LDstBuffer: Local SRAM
1071// LBuffer : Local SRAM
1072//
1073// For this configuration the following results were achieved:
1074// 49800 clocks - full operation (1.25ms at 40 MHz CPU)
1075// 47624 clocks - without normalization (1.2ms at 40 MHz CPU)
1076//
1077// If you are not going to use this routine as a C callable function,
1078// you can reduce the number of instructions removing all stack operations. In this case
1079// the total execution time can be reduced by around 50 clocks.
1080
1081//============================================ FFT2048 ============================================
1087 // Forward FFT 2048
1095void FFT_Fwd2048Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients
1103void FFT_Fwd2048Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients
1104
1178 nm32sc* GSrcBuffer, // Source buffer :long[2048]
1179 nm32sc* GDstBuffer, // Result FFT :long[2048]
1180 void* LBuffer, // Temp buffer :long[2048*4]
1181 int ShiftR=-1 // Right shift normalization
1182 );
1183// \en The performance of the FFT_Fwd2048 routine depends on memory allocation for
1184// input,output and temporary buffers.
1185// For the maximum speed performance it is recommended
1186// to use the following configuration:
1187// GSrcBuffer: Global SRAM
1188// LDstBuffer: Local SRAM
1189// LBuffer : Local SRAM
1190//
1191// For this configuration the following results were achieved:
1192// 49800 clocks - full operation (1.25ms at 40 MHz CPU)
1193// 47624 clocks - without normalization (1.2ms at 40 MHz CPU)
1194//
1195// If you are not going to use this routine as a C callable function,
1196// you can reduce the number of instructions removing all stack operations. In this case
1197// the total execution time can be reduced by around 50 clocks.
1198
1199//=========================================== Inversed FFT 2048 ==================================
1212 void FFT_Inv2048Set6bit();// Sets 6-bit accuracy of sin-cosine coefficients
1220 void FFT_Inv2048Set7bit();// Sets 7-bit accuracy of sin-cosine coefficients
1221
1345 nm32sc* GSrcBuffer, // Source buffer :long[2048]
1346 nm32sc* LDstBuffer, // Result FFT :long[2048]
1347 void* LBuffer, // Temp buffer :long[2048*4]
1348 void* GBuffer, // Temp buffer :long[2048*4]
1349 int ShiftR1=11, // First Right shift normalization
1350 int ShiftR2=-1 // Final Right shift normalization
1351 );
1352// \en The performance of the FFT_Fwd256 routine depends on memory allocation for
1353// input,output and temporary buffers.
1354// For the maximum speed performance it is recommended
1355// to use the following configuration:
1356// GSrcBuffer: Global SRAM
1357// LDstBuffer: Local SRAM
1358// LBuffer : Local SRAM
1359// GBuffer : Global SRAM
1360//
1361// For this configuration the following results were achieved:
1362// 52160 clocks - full operation (1.3 ms at 40MHz CPU)
1363// 47780 clocks - without both normalizations (1.2ms at 40MHz CPU)
1364//
1365// If you are not going to use this routine as a C callable function,
1366// you can reduce the number of instructions removing all stack operations. In this case
1367// the total execution time can be reduced by around 50 clocks.#include "nmfft.h"
1368
1369//============================================ FFT4096 ============================================
1375 // Forward FFT 4096
1376
1430 nm32sc* GSrcBuffer, // Source buffer :long[4096]
1431 nm32sc* GDstBuffer, // Result FFT :long[4096]
1432 void* LBuffer, // Temp buffer :long[4096*2]
1433 void* GBuffer // Temp buffer :long[4096*3]
1434 );
1435
1436//=========================================== Inversed FFT 4096 ==================================
1495 nm32sc* GSrcBuffer, // Source buffer :long[4096]
1496 nm32sc* GDstBuffer, // Result FFT :long[4096]
1497 void* LBuffer, // Temp buffer :long[4096*2]
1498 void* GBuffer // Temp buffer :long[4096*3]
1499 );
1500
1501//============================================ FFT8192 ============================================
1507 // Forward FFT 8192
1508
1564 nm32sc* LSrcBuffer, // Source buffer :long[8192]
1565 nm32sc* GDstBuffer, // Result FFT :long[8192]
1566 void* LBuffer, // Temp buffer :long[8192]
1567 void* GBuffer // Temp buffer :long[8192*3]
1568 );
1569
1570//=========================================== Inversed FFT 8192 ==================================
1630 nm32sc* LSrcBuffer, // Source buffer :long[8192]
1631 nm32sc* GDstBuffer, // Result FFT :long[8192]
1632 void* LBuffer, // Temp buffer :long[8192]
1633 void* GBuffer // Temp buffer :long[8192*3]
1634 );
1635
1636
1637
1638#ifdef __cplusplus
1639 };
1640#endif
1641
1642#endif
void FFT_Fwd1024Set6bit()
Устанавливает 6-битную точность вычислений
void FFT_Fwd1024Set7bit()
Устанавливает 7-битную точность вычислений
void FFT_Fwd1024(nm32sc *GSrcBuffer, nm32sc *LDstBuffer, void *LBuffer, void *GBuffer, int ShiftR=-1)
Прямое быстрое преобразование Фурье-1024.
void FFT_Fwd2048Set7bit()
Устанавливает 7-битную точность вычислений
void FFT_Fwd2048Set6bit()
Устанавливает 6-битную точность вычислений
void FFT_Fwd2048(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, int ShiftR=-1)
Прямое быстрое преобразование Фурье-2048.
void FFT_Fwd256(nm32sc *GSrcBuffer, nm32sc *LDstBuffer, void *LBuffer, void *GBuffer, int ShiftR=-1)
Прямое быстрое преобразование Фурье-256.
void FFT_Fwd256Set7bit()
Устанавливает 7-битную точность вычислений
void FFT_Fwd256Set6bit()
Устанавливает 6-битную точность вычислений
void FFT_Fwd4096(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer)
Прямое быстрое преобразование Фурье-4096.
void FFT_Fwd512Set7bit()
Устанавливает 7-битную точность вычислений
void FFT_Fwd512(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer, int ShiftR=-1)
Прямое быстрое преобразование Фурье-512.
void FFT_Fwd512Set6bit()
Устанавливает 6-битную точность вычислений
void FFT_Fwd8192(nm32sc *LSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer)
Прямое быстрое преобразование Фурье-8192.
void FFT_Inv1024Set6bit()
Устанавливает 6-битную точность вычислений
void FFT_Inv1024(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer, int ShiftR1=10, int ShiftR2=-1)
Обратное быстрое преобразование Фурье. ОБПФ-1024.
void FFT_Inv1024Set7bit()
Устанавливает 7-битную точность вычислений
void FFT_Inv2048Set7bit()
Устанавливает 7-битную точность вычислений
void FFT_Inv2048(nm32sc *GSrcBuffer, nm32sc *LDstBuffer, void *LBuffer, void *GBuffer, int ShiftR1=11, int ShiftR2=-1)
Обратное быстрое преобразование Фурье. ОБПФ-2048.
void FFT_Inv2048Set6bit()
Устанавливает 6-битную точность вычислений
void FFT_Inv256Set6bit()
Устанавливает 6-битную точность вычислений
void FFT_Inv256(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer, int ShiftR1=8, int ShiftR2=-1)
Обратное быстрое преобразование Фурье. ОБПФ-256.
void FFT_Inv256Set7bit()
Устанавливает 7-битную точность вычислений
void FFT_Inv4096(nm32sc *GSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer)
Обратное быстрое преобразование Фурье. ОБПФ-4096.
void FFT_Inv512Set6bit()
Устанавливает 6-битную точность вычислений
void FFT_Inv512(nm32sc *GSrcBuffer, nm32sc *LDstBuffer, void *LBuffer, void *GBuffer, int ShiftR1=9, int ShiftR2=-1)
Обратное быстрое преобразование Фурье. ОБПФ-512.
void FFT_Inv512Set7bit()
Устанавливает 7-битную точность вычислений
void FFT_Inv8192(nm32sc *LSrcBuffer, nm32sc *GDstBuffer, void *LBuffer, void *GBuffer)
Обратное быстрое преобразование Фурье. ОБПФ-8192.
Definition: nmtype.h:1301