nmpp
vArithmetics.h
1//------------------------------------------------------------------------
2//
3// $Workfile:: vArithmetics.h $
4//
5// Векторно-матричная библиотека
6//
7// Copyright (c) RC Module Inc.
8//
9// $Revision: 1.2 $ $Date: 2005/06/23 15:15:15 $
10//
19//------------------------------------------------------------------------
20
21#ifndef _VARITHM_H_INCLUDED_
22#define _VARITHM_H_INCLUDED_
23
24#include "malloc32.h"
25
26#ifdef __cplusplus
27 extern "C" {
28#endif
29
90void nmppsAbs_4s (const nm4s* pSrcVec, nm4s* pDstVec, int nSize);
91void nmppsAbs_8s (const nm8s* pSrcVec, nm8s* pDstVec, int nSize);
92void nmppsAbs_16s(const nm16s* pSrcVec, nm16s* pDstVec, int nSize);
93void nmppsAbs_32s(const nm32s* pSrcVec, nm32s* pDstVec, int nSize);
94void nmppsAbs_64s(const nm64s* pSrcVec, nm64s* pDstVec, int nSize);
96
97
98
147void nmppsAbs1_4s (const nm4s* pSrcVec, nm4s* pDstVec, int nSize);
148void nmppsAbs1_8s (const nm8s* pSrcVec, nm8s* pDstVec, int nSize);
149void nmppsAbs1_16s(const nm16s* pSrcVec, nm16s* pDstVec, int nSize);
150void nmppsAbs1_32s(const nm32s* pSrcVec, nm32s* pDstVec, int nSize);
151void nmppsAbs1_64s(const nm64s* pSrcVec, nm64s* pDstVec, int nSize);
153
154
155//*****************************************************************************
156
203void nmppsNeg_8s (const nm8s* pSrcVec, nm8s* pDstVec, int nSize);
204void nmppsNeg_16s(const nm16s* pSrcVec, nm16s* pDstVec, int nSize);
205void nmppsNeg_32s(const nm32s* pSrcVec, nm32s* pDstVec, int nSize);
206void nmppsNeg_64s(const nm64s* pSrcVec, nm64s* pDstVec, int nSize);
208
209//*****************************************************************************
210
264void nmppsAddC_8s (const nm8s* pSrcVec, int8b nVal, nm8s* pDstVec, int nSize);
265void nmppsAddC_16s (const nm16s* pSrcVec, int16b nVal, nm16s* pDstVec, int nSize);
266void nmppsAddC_32s (const nm32s* pSrcVec, int32b nVal, nm32s* pDstVec, int nSize);
267void nmppsAddC_64s (const nm64s* pSrcVec, int64b nVal, nm64s* pDstVec, int nSize);
269
270//*****************************************************************************
271
325void nmppsAddC_p64s(const nm64s* pSrcVec, int64b* pnVal, nm64s* pDstVec, int nSize);
327
328 //*****************************************************************************
329
383 void nmppsAddC_32fcr(const nm32fcr *pSrcVec, nm32fcr *pDstVec, float nVal, int nSize);
385
386//*****************************************************************************
387
441void nmppsAdd_4s (const nm4s* pSrcVec1, const nm4s* pSrcVec2, nm4s* pDstVec, int nSize);
442void nmppsAdd_8s (const nm8s* pSrcVec1, const nm8s* pSrcVec2, nm8s* pDstVec, int nSize);
443void nmppsAdd_16s(const nm16s* pSrcVec1, const nm16s* pSrcVec2, nm16s* pDstVec, int nSize);
444void nmppsAdd_32s(const nm32s* pSrcVec1, const nm32s* pSrcVec2, nm32s* pDstVec, int nSize);
445void nmppsAdd_64s(const nm64s* pSrcVec1, const nm64s* pSrcVec2, nm64s* pDstVec, int nSize);
447
501void nmppsAdd_32f(const nm32f* pSrcVec1, const nm32f* pSrcVec2, nm32f* pDstVec, int nSize);
503
557void nmppsAddC_32f (const nm32f* pSrcVec, nm32f* pDstVec, float C, int nSize);
559
560void nmppsAddEx_64s (const nm64s *pSrcVec1, int srcStep1, const nm64s *pSrcVec2, int srcStep2, nm64s *pDstVec, int dstStep, int nSize );
561
562void nmppsAdd4V_16s(
563 nm16s** Vectors, // array of pointers to buffers :nm8s* Any [NumberOfBuffer]
564 nm16s* pDstVec, // result buffer :long Local [VecSize/4]
565 int nSize // buffer size in 8-bit elements:nSize =[256,512,..]
566 );
567
568
569//*****************************************************************************
570
632void nmppsAdd_AddC_32s(nm32s* pSrcVec1, nm32s* pSrcVec2, int nVal, nm32s* pDstVec, int nSize);
634
635//*****************************************************************************
636
690void nmppsSubC_4s (const nm4s* pSrcVec, int4b nVal, nm4s* pDstVec, int nSize);
691void nmppsSubC_8s (const nm8s* pSrcVec, int8b nVal, nm8s* pDstVec, int nSize);
692void nmppsSubC_16s(const nm16s* pSrcVec, int16b nVal, nm16s* pDstVec, int nSize);
693void nmppsSubC_32s(const nm32s* pSrcVec, int32b nVal, nm32s* pDstVec, int nSize);
694void nmppsSubC_64s(const nm64s* pSrcVec, int64b nVal, nm64s* pDstVec, int nSize);
696
750void nmppsSubC_32f (const nm32f* pSrcVec, nm32f* pDstVec, float C, int nSize);
752
753//*****************************************************************************
754
808void nmppsSubCRev_8s (const nm8s* pSrcVec, int8b nVal, nm8s* pDstVec, int nSize);
809void nmppsSubCRev_16s(const nm16s* pSrcVec, int16b nVal, nm16s* pDstVec, int nSize);
810void nmppsSubCRev_32s(const nm32s* pSrcVec, int32b nVal, nm32s* pDstVec, int nSize);
811void nmppsSubCRev_64s(const nm64s* pSrcVec, int64b nVal, nm64s* pDstVec, int nSize);
812//void nmppsSubCRev_64s(nm64s* pSrcVec, int64b* pnVal, nm64s* pDstVec, int nSize);
814
815//*****************************************************************************
816
870void nmppsSubCRev_32f (const nm32f* pSrcVec, nm32f* pDstVec, float C, int nSize);
872
926void nmppsSub_4s (const nm4s* pSrcVec1, nm4s* pSrcVec2, nm4s* pDstVec, int nSize);
927void nmppsSub_8s (const nm8s* pSrcVec1, nm8s* pSrcVec2, nm8s* pDstVec, int nSize);
928void nmppsSub_16s(const nm16s* pSrcVec1, nm16s* pSrcVec2, nm16s* pDstVec, int nSize);
929void nmppsSub_32s(const nm32s* pSrcVec1, nm32s* pSrcVec2, nm32s* pDstVec, int nSize);
930void nmppsSub_64s(const nm64s* pSrcVec1, nm64s* pSrcVec2, nm64s* pDstVec, int nSize);
932
986void nmppsSub_32f(const nm32f* pSrcVec1, const nm32f* pSrcVec2, nm32f* pDstVec, int nSize);
988//*****************************************************************************
989
1054void nmppsAbsDiff_8s (const nm8s* pSrcVec1, nm8s* pSrcVec2, nm8s* pDstVec, int nSize);
1055void nmppsAbsDiff_16s(const nm16s* pSrcVec1, nm16s* pSrcVec2, nm16s* pDstVec, int nSize);
1056void nmppsAbsDiff_32s(const nm32s* pSrcVec1, nm32s* pSrcVec2, nm32s* pDstVec, int nSize);
1057void nmppsAbsDiff_64s(const nm64s* pSrcVec1, nm64s* pSrcVec2, nm64s* pDstVec, int nSize);
1059
1060
1115void nmppsAbsDiff_32f(const nm32f* pSrcVec1, nm32f* pSrcVec2, nm32f* pDstVec, int nSize);
1117//*****************************************************************************
1118
1186void nmppsAbsDiff1_8s(nm8s* pSrcVec1, nm8s* pSrcVec2, nm8s* pDstVec, int nSize);
1188
1189//*****************************************************************************
1190
1243void nmppsMulC_8s (const nm8s* pSrcVec, int8b nVal, nm8s* pDstVec, int nSize);
1244void nmppsMulC_8s16s (const nm8s* pSrcVec, int16b nVal, nm16s* pDstVec, int nSize);
1245void nmppsMulC_16s (const nm16s* pSrcVec, int16b nVal, nm16s* pDstVec, int nSize);
1246void nmppsMulC_16s32s(const nm16s* pSrcVec, int32b nVal, nm32s* pDstVec, int nSize);
1247void nmppsMulC_32s (const nm32s* pSrcVec, int32b nVal, nm32s* pDstVec, int nSize);
1248void nmppsMulC_32s64s(const nm32s* pSrcVec, int64b nVal, nm64s* pDstVec, int nSize);
1249void nmppsMulC_64s (const nm64s* pSrcVec, int64b nVal, nm64s* pDstVec, int nSize);
1250
1251void nmppsMulC_2s16s (const nm2s* pSrcVec, int16b nVal, nm16s* pDstVec, int nSize);
1253
1254
1298void nmppsMulC_32f(const nm32f* pSrcVec, nm32f* pDstVec, float C, int nSize);
1300
1301
1356void nmppsMul_Mul_Add_32f(const nm32f* pSrcVec1, const nm32f* pSrcVec2, const nm32f* pSrcVec3, const nm32f* pSrcVec4, nm32f* pDstVec, int nSize);
1357void nmppsMul_Mul_Add_32fcr(const nm32fcr *pSrcVec1, const nm32fcr *pSrcVec2, const nm32fcr *pSrcVec3, const nm32fcr *pSrcVec4, nm32fcr *pDstVec, int nSize);
1358void nmppsMul_Mul_Add_64f(const nm64f *pSrcVec1, const nm64f *pSrcVec2, const nm64f *pSrcVec3, const nm64f *pSrcVec4, nm64f *pDstVec, int nSize);
1360
1409void nmppsMul_Add_32f(const nm32f* pSrcVec1, const nm32f* pSrcVec2, const nm32f* pSrcVecAdd, nm32f* pDstVec, int nSize);
1410void nmppsMul_Add_64f(const nm64f* pSrcVec1, const nm64f* pSrcVec2, const nm64f* pSrcVecAdd, nm64f* pDstVec, int nSize);
1411void nmppsMul_Add_32fcr(const nm32fcr* pSrcVec1, const nm32fcr* pSrcVec2, const nm32fcr* pSrcVecAdd, nm32fcr* pDstVec, int nSize);
1413
1468void nmppsMul_ConjMul_Add_32fcr(const nm32fcr* pSrcVec1, const nm32fcr* pSrcVec2, const nm32fcr* pSrcVec3, const nm32fcr* pSrcVec4, nm32fcr* pDstVec, int nSize);
1470
1519void nmppsMulC_AddC_32f(const nm32f* pSrcVec, float nMulC, float nAddC, nm32f* pDstVec, int nSize);
1521
1576void nmppsMulC_AddV_AddC_32f(nm32f* pSrcVec, float nMulC, nm32f* pVecAdd, float nAddC, nm32f* pDstVec, int nSize);
1578
1621void nmppsMul_32f(const nm32f* pSrcVec1, const nm32f* pSrcVec2, nm32f* pDstVec, int nSize);
1622void nmppsMul_32fcr(const nm32fcr* pSrcVec1, const nm32fcr* pSrcVec2, nm32fcr* pDstVec, int nSize);
1624
1667void nmppsConjMul_32fcr(const nm32fcr *pSrcVec1, const nm32fcr *pSrcVec2, nm32fcr *pDstVec, int nSize);
1669
1724void nmppsMul_Mul_Sub_32f(const nm32f* pSrcVec1, const nm32f* pSrcVec2, const nm32f* pSrcVec3, const nm32f* pSrcVec4, nm32f* pDstVec, int nSize);
1726
1771void nmppsMulC_AddV_32f(const nm32f* pSrcVec1, const nm32f* pSrcVec2, nm32f* pDstVec, float C, int nSize);
1773//*******************************************************************
1774
1775
1776void nmppsMulC_AddC_2s16s(const nm2s* pSrcVec, int32b nMulC, int nAddC, nm16s* pDstVec, int nSize);
1777
1778//*****************************************************************************
1779
1838void nmppsMul_AddC_64s(const nm64s* pSrcVec1,const nm64s* pSrcVec2, const nm64s* pnVal, nm64s* pDstVec, int nSize);
1840
1841//*****************************************************************************
1842
1901void nmppsMul_AddC_32f(const nm32f* pSrcVec1, const nm32f* pSrcVec2, float nValueAddC, nm32f* pDstVec, int nSize);
1903//*****************************************************************************
1904
1963void nmppsMulC_AddC_32s(const nm32s* pSrcVec, int nMulVal, int nAddVal, nm32s* pDstVec, int nSize);
1965
1966
1967
1968
1993void nmppsMulC_AddC_2x32s(int32x2* dataSparseSrc, int32x2* mulArg, int32x2* addArg, int32x2 *dataSparseDst, int size, int stepSparseSrc, int stepSparseDst);
1995
1996
2023void nmppsRShiftC_MulC_AddC_2x32s(int32x2* dataSparseSrc, int32x2* preshiftArg, int32x2* mulArg, int32x2* addArg, int32x2 *dataSparseDst, int size, int stepSparseSrc, int stepSparseDst);
2025
2026
2027//*****************************************************************************
2028
2093void nmppsMulC_AddV_AddC_32s(nm32s* pSrcVec1, int nMulVal, nm32s* pSrcVec2, int nAddVal, nm32s* pDstVec, int nSize);
2095
2096//*****************************************************************************
2097
2155void nmppsSumN_8s16s(nm8s ** ppSrcVec, nm16s* pDstVec, int nSize, int nNumberOfVectors);
2156void nmppsSumN_16s (nm16s ** ppSrcVec, nm16s* pDstVec, int nSize, int nNumberOfVectors);
2158
2159
2160void nmppsSum4_16s(
2161 nm16s** Vectors, // array of pointers to buffers :nm8s* Any [NumberOfBuffer]
2162 nm16s* pDstVec, // result buffer :long Local [VecSize/4]
2163 int nSize // buffer size in 8-bit elements:nSize =[256,512,..]
2164 );
2165
2166//*****************************************************************************
2167
2256void nmppsDivC_32s(nm32s* pSrcVec, int nDivisor, nm32s* pDstVec, int nSize, void* pTmpBuf1, void* pTmpBuf2);
2258
2259//*****************************************************************************
2260
2308void nmppsSum_8s (const nm8s* pSrcVec, int nSize, int32b *pnRes);
2309void nmppsSum_16s(const nm16s* pSrcVec, int nSize, int64b *pnRes);
2310void nmppsSum_32s(const nm32s* pSrcVec, int nSize, int64b *pnRes);
2311void nmppsSum_64s(const nm64s* pSrcVec, int nSize, int64b *pnRes);
2313
2314//*****************************************************************************
2315
2365void nmppsSum_1 (const nm1* pSrcVec, int nSize, int32b* pnRes, void* pTmpBuf);
2367
2422
2423
2424
2425/*
2426void nmppsDotProd_8s8sm (nm8s* pSrcVec1, nm8s* pSrcVec2, int nSize, int64b* pnRes, SpecTmp1* spec);
2427void nmppsDotProd_8s16sm(nm8s* pSrcVec1, nm16s* pSrcVec2, int nSize, int64b* pnRes, SpecTmp1* spec);
2428void nmppsDotProd_8s32sm(nm8s* pSrcVec1, nm32s* pSrcVec2, int nSize, int64b* pnRes, SpecTmp1* spec);
2429void nmppsDotProd_8s64s(nm8s* pSrcVec1, nm64s* pSrcVec2, int nSize, int64b* pnRes);
2430
2431void nmppsDotProd_16s16sm(nm16s* pSrcVec1, nm16s* pSrcVec2, int nSize, int64b* pnRes, SpecTmp1* spec);
2432void nmppsDotProd_16s32sm(nm16s* pSrcVec1, nm32s* pSrcVec2, int nSize, int64b* pnRes, SpecTmp1* spec);
2433void nmppsDotProd_16s64s(nm16s* pSrcVec1, nm64s* pSrcVec2, int nSize, int64b* pnRes);
2434
2435void nmppsDotProd_32s32sm(nm32s* pSrcVec1, nm32s* pSrcVec2, int nSize, int64b* pnRes, SpecTmp1* spec);
2436void nmppsDotProd_32s64s(nm32s* pSrcVec1, nm64s* pSrcVec2, int nSize, int64b* pnRes);
2437
2438void nmppsDotProd_64s64s(nm64s* pSrcVec1, nm64s* pSrcVec2, int nSize, int64b* pnRes);
2439*/
2440int nmppsDotProd_8s8sm (const nm8s* pSrcVec1, const nm8s* pSrcVec2, int nSize, int64b* pnRes, nm64s* tmp);
2441int nmppsDotProd_8s16sm (const nm8s* pSrcVec1, const nm16s* pSrcVec2, int nSize, int64b* pnRes, nm64s* tmp);
2442int nmppsDotProd_8s32sm (const nm8s* pSrcVec1, const nm32s* pSrcVec2, int nSize, int64b* pnRes, nm64s* tmp);
2443int nmppsDotProd_16s16sm(const nm16s* pSrcVec1, const nm16s* pSrcVec2, int nSize, int64b* pnRes, nm64s* tmp);
2444int nmppsDotProd_16s32sm(const nm16s* pSrcVec1, const nm32s* pSrcVec2, int nSize, int64b* pnRes, nm64s* tmp);
2445int nmppsDotProd_32s32sm(const nm32s* pSrcVec1, const nm32s* pSrcVec2, int nSize, int64b* pnRes, nm64s* tmp);
2447
2500void nmppsDotProd_8s64s (const nm8s* pSrcVec1, const nm64s* pSrcVec2, int nSize, int64b* pnRes);
2501void nmppsDotProd_16s64s (const nm16s* pSrcVec1, const nm64s* pSrcVec2, int nSize, int64b* pnRes);
2502void nmppsDotProd_32s64s (const nm32s* pSrcVec1, const nm64s* pSrcVec2, int nSize, int64b* pnRes);
2503void nmppsDotProd_64s64s (const nm64s* pSrcVec1, const nm64s* pSrcVec2, int nSize, int64b* pnRes);
2505
2511//void nmppsDotProd_16sc(nm16sc *pSrcVec1, nm64sc *pSrcVec2, int nSize, nm64sc *pnRes);//pc version is not available
2512void nmppsDotProd_64sc(nm64sc *pSrcVec1, nm64sc *pSrcVec2, int nSize, nm64sc *pnRes);
2514
2515//*****************************************************************************
2573
2574void nmppsWeightedSum_8s16s(nm8s* pSrcVec1,int nW1,nm8s* pSrcVec2,int nW2, nm16s* pDstVec, int nSize);
2575void nmppsWeightedSum_16s32s(nm16s* pSrcVec1,int nW1,nm16s* pSrcVec2,int nW2, nm32s* pDstVec, int nSize);
2576void nmppsWeightedSum_32s64s(nm32s* pSrcVec1,nm64s nW1,nm32s* pSrcVec2,nm64s nW2, nm64s* pDstVec, int nSize);
2578
2579void nmppsMulC_Add_32fcr(const nm32fcr* pSrcVec1, const nm32fcr* pSrcVec2, nm32fcr* pDstVec, float C, int nSize);
2580
2581#ifdef __cplusplus
2582 };
2583#endif
2584
2585#endif // _VECARITM_H_INCLUDED_
void nmppsMulC_AddC_2x32s(int32x2 *dataSparseSrc, int32x2 *mulArg, int32x2 *addArg, int32x2 *dataSparseDst, int size, int stepSparseSrc, int stepSparseDst)
Sparse vector by constant multiplication with addition of constant.
int int4b
Definition: nmtype.h:592
int int8b
Definition: nmtype.h:618
int int32b
Definition: nmtype.h:683
int int16b
Definition: nmtype.h:644
INT64 int64b
Definition: nmtype.h:709
int nm32s
Definition: nmtype.h:292
long long nm64s
Definition: nmtype.h:375
void nm2s
Definition: nmtype.h:116
int nm1
Definition: nmtype.h:100
short nm16s
Definition: nmtype.h:243
void nm4s
Definition: nmtype.h:129
char nm8s
Definition: nmtype.h:167
Definition: nmtype.h:293
Definition: nmtype.h:1335
Definition: nmtype.h:1364