00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. July 2011 00005 * $Revision: V1.0.10 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cfft_radix4_q15.c 00009 * 00010 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00011 * In-place bit reversal using bit reversal table 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Version 1.0.10 2011/7/15 00016 * Big Endian support added and Merged M0 and M3/M4 Source code. 00017 * 00018 * Version 1.0.3 2010/11/29 00019 * Re-organized the CMSIS folders and updated documentation. 00020 * 00021 * Version 1.0.2 2010/11/11 00022 * Documentation updated. 00023 * 00024 * Version 1.0.1 2010/10/05 00025 * Production release and review comments incorporated. 00026 * 00027 * Version 1.0.0 2010/09/20 00028 * Production release and review comments incorporated. 00029 * 00030 * Version 0.0.5 2010/04/26 00031 * incorporated review comments and updated with latest CMSIS layer 00032 * 00033 * Version 0.0.3 2010/03/10 00034 * Initial version 00035 * -------------------------------------------------------------------- */ 00036 00037 #include "arm_math.h" 00038 00066 void arm_cfft_radix4_q15( 00067 const arm_cfft_radix4_instance_q15 * S, 00068 q15_t * pSrc) 00069 { 00070 if(S->ifftFlag == 1u) 00071 { 00072 /* Complex IFFT radix-4 */ 00073 arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, 00074 S->twidCoefModifier); 00075 } 00076 else 00077 { 00078 /* Complex FFT radix-4 */ 00079 arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, 00080 S->twidCoefModifier); 00081 } 00082 00083 if(S->bitReverseFlag == 1u) 00084 { 00085 /* Bit Reversal */ 00086 arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00087 } 00088 00089 } 00090 00095 /* 00096 * Radix-4 FFT algorithm used is : 00097 * 00098 * Input real and imaginary data: 00099 * x(n) = xa + j * ya 00100 * x(n+N/4 ) = xb + j * yb 00101 * x(n+N/2 ) = xc + j * yc 00102 * x(n+3N 4) = xd + j * yd 00103 * 00104 * 00105 * Output real and imaginary data: 00106 * x(4r) = xa'+ j * ya' 00107 * x(4r+1) = xb'+ j * yb' 00108 * x(4r+2) = xc'+ j * yc' 00109 * x(4r+3) = xd'+ j * yd' 00110 * 00111 * 00112 * Twiddle factors for radix-4 FFT: 00113 * Wn = co1 + j * (- si1) 00114 * W2n = co2 + j * (- si2) 00115 * W3n = co3 + j * (- si3) 00116 00117 * The real and imaginary output values for the radix-4 butterfly are 00118 * xa' = xa + xb + xc + xd 00119 * ya' = ya + yb + yc + yd 00120 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00121 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00122 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00123 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00124 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00125 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00126 * 00127 */ 00128 00138 void arm_radix4_butterfly_q15( 00139 q15_t * pSrc16, 00140 uint32_t fftLen, 00141 q15_t * pCoef16, 00142 uint32_t twidCoefModifier) 00143 { 00144 00145 #ifndef ARM_MATH_CM0 00146 00147 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00148 00149 q31_t R, S, T, U; 00150 q31_t C1, C2, C3, out1, out2; 00151 q31_t *pSrc, *pCoeff; 00152 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 00153 q15_t in; 00154 00155 /* Total process is divided into three stages */ 00156 00157 /* process first stage, middle stages, & last stage */ 00158 00159 /* pointer initializations for SIMD calculations */ 00160 pSrc = (q31_t *) pSrc16; 00161 pCoeff = (q31_t *) pCoef16; 00162 00163 /* Initializations for the first stage */ 00164 n2 = fftLen; 00165 n1 = n2; 00166 00167 /* n2 = fftLen/4 */ 00168 n2 >>= 2u; 00169 00170 /* Index for twiddle coefficient */ 00171 ic = 0u; 00172 00173 /* Index for input read and output write */ 00174 i0 = 0u; 00175 j = n2; 00176 00177 /* Input is in 1.15(q15) format */ 00178 00179 /* start of first stage process */ 00180 do 00181 { 00182 /* Butterfly implementation */ 00183 00184 /* index calculation for the input as, */ 00185 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 00186 i1 = i0 + n2; 00187 i2 = i1 + n2; 00188 i3 = i2 + n2; 00189 00190 /* Reading i0, i0+fftLen/2 inputs */ 00191 /* Read ya (real), xa(imag) input */ 00192 T = pSrc[i0]; 00193 in = ((int16_t) (T & 0xFFFF)) >> 2; 00194 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00195 /* Read yc (real), xc(imag) input */ 00196 S = pSrc[i2]; 00197 in = ((int16_t) (S & 0xFFFF)) >> 2; 00198 S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00199 /* R = packed((ya + yc), (xa + xc) ) */ 00200 R = __QADD16(T, S); 00201 /* S = packed((ya - yc), (xa - xc) ) */ 00202 S = __QSUB16(T, S); 00203 00204 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00205 /* Read yb (real), xb(imag) input */ 00206 T = pSrc[i1]; 00207 in = ((int16_t) (T & 0xFFFF)) >> 2; 00208 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00209 /* Read yd (real), xd(imag) input */ 00210 U = pSrc[i3]; 00211 in = ((int16_t) (U & 0xFFFF)) >> 2; 00212 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00213 /* T = packed((yb + yd), (xb + xd) ) */ 00214 T = __QADD16(T, U); 00215 00216 /* writing the butterfly processed i0 sample */ 00217 /* xa' = xa + xb + xc + xd */ 00218 /* ya' = ya + yb + yc + yd */ 00219 pSrc[i0] = __SHADD16(R, T); 00220 00221 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 00222 R = __QSUB16(R, T); 00223 00224 /* co2 & si2 are read from SIMD Coefficient pointer */ 00225 C2 = pCoeff[2u * ic]; 00226 00227 00228 #ifndef ARM_MATH_BIG_ENDIAN 00229 00230 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00231 out1 = __SMUAD(C2, R) >> 16u; 00232 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00233 out2 = __SMUSDX(C2, R); 00234 00235 #else 00236 00237 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00238 out1 = __SMUSDX(R, C2) >> 16u; 00239 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00240 out2 = __SMUAD(C2, R); 00241 00242 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00243 00244 /* Reading i0+fftLen/4 */ 00245 /* T = packed(yb, xb) */ 00246 T = pSrc[i1]; 00247 in = ((int16_t) (T & 0xFFFF)) >> 2; 00248 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00249 00250 /* writing the butterfly processed i0 + fftLen/4 sample */ 00251 /* writing output(xc', yc') in little endian format */ 00252 pSrc[i1] = (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00253 00254 /* Butterfly calculations */ 00255 /* U = packed(yd, xd) */ 00256 U = pSrc[i3]; 00257 in = ((int16_t) (U & 0xFFFF)) >> 2; 00258 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00259 /* T = packed(yb-yd, xb-xd) */ 00260 T = __QSUB16(T, U); 00261 00262 00263 #ifndef ARM_MATH_BIG_ENDIAN 00264 00265 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00266 R = __QASX(S, T); 00267 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00268 S = __QSAX(S, T); 00269 00270 #else 00271 00272 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00273 R = __QSAX(S, T); 00274 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00275 S = __QASX(S, T); 00276 00277 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00278 00279 /* co1 & si1 are read from SIMD Coefficient pointer */ 00280 C1 = pCoeff[ic]; 00281 /* Butterfly process for the i0+fftLen/2 sample */ 00282 00283 #ifndef ARM_MATH_BIG_ENDIAN 00284 00285 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00286 out1 = __SMUAD(C1, S) >> 16u; 00287 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00288 out2 = __SMUSDX(C1, S); 00289 00290 #else 00291 00292 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00293 out1 = __SMUSDX(S, C1) >> 16u; 00294 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00295 out2 = __SMUAD(C1, S); 00296 00297 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00298 00299 /* writing output(xb', yb') in little endian format */ 00300 pSrc[i2] = ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 00301 00302 00303 /* co3 & si3 are read from SIMD Coefficient pointer */ 00304 C3 = pCoeff[3u * ic]; 00305 /* Butterfly process for the i0+3fftLen/4 sample */ 00306 00307 #ifndef ARM_MATH_BIG_ENDIAN 00308 00309 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00310 out1 = __SMUAD(C3, R) >> 16u; 00311 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00312 out2 = __SMUSDX(C3, R); 00313 00314 #else 00315 00316 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00317 out1 = __SMUSDX(R, C3) >> 16u; 00318 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00319 out2 = __SMUAD(C3, R); 00320 00321 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00322 00323 /* writing output(xd', yd') in little endian format */ 00324 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00325 00326 /* Twiddle coefficients index modifier */ 00327 ic = ic + twidCoefModifier; 00328 00329 /* Updating input index */ 00330 i0 = i0 + 1u; 00331 00332 } while(--j); 00333 /* data is in 4.11(q11) format */ 00334 00335 /* end of first stage process */ 00336 00337 00338 /* start of middle stage process */ 00339 00340 /* Twiddle coefficients index modifier */ 00341 twidCoefModifier <<= 2u; 00342 00343 /* Calculation of Middle stage */ 00344 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00345 { 00346 /* Initializations for the middle stage */ 00347 n1 = n2; 00348 n2 >>= 2u; 00349 ic = 0u; 00350 00351 for (j = 0u; j <= (n2 - 1u); j++) 00352 { 00353 /* index calculation for the coefficients */ 00354 C1 = pCoeff[ic]; 00355 C2 = pCoeff[2u * ic]; 00356 C3 = pCoeff[3u * ic]; 00357 00358 /* Twiddle coefficients index modifier */ 00359 ic = ic + twidCoefModifier; 00360 00361 /* Butterfly implementation */ 00362 for (i0 = j; i0 < fftLen; i0 += n1) 00363 { 00364 /* index calculation for the input as, */ 00365 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 00366 i1 = i0 + n2; 00367 i2 = i1 + n2; 00368 i3 = i2 + n2; 00369 00370 /* Reading i0, i0+fftLen/2 inputs */ 00371 /* Read ya (real), xa(imag) input */ 00372 T = pSrc[i0]; 00373 00374 /* Read yc (real), xc(imag) input */ 00375 S = pSrc[i2]; 00376 00377 /* R = packed( (ya + yc), (xa + xc)) */ 00378 R = __QADD16(T, S); 00379 00380 /* S = packed((ya - yc), (xa - xc)) */ 00381 S = __QSUB16(T, S); 00382 00383 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00384 /* Read yb (real), xb(imag) input */ 00385 T = pSrc[i1]; 00386 00387 /* Read yd (real), xd(imag) input */ 00388 U = pSrc[i3]; 00389 00390 00391 /* T = packed( (yb + yd), (xb + xd)) */ 00392 T = __QADD16(T, U); 00393 00394 00395 /* writing the butterfly processed i0 sample */ 00396 00397 /* xa' = xa + xb + xc + xd */ 00398 /* ya' = ya + yb + yc + yd */ 00399 out1 = __SHADD16(R, T); 00400 in = ((int16_t) (out1 & 0xFFFF)) >> 1; 00401 out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF); 00402 pSrc[i0] = out1; 00403 00404 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 00405 R = __SHSUB16(R, T); 00406 00407 00408 #ifndef ARM_MATH_BIG_ENDIAN 00409 00410 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00411 out1 = __SMUAD(C2, R) >> 16u; 00412 00413 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00414 out2 = __SMUSDX(C2, R); 00415 00416 #else 00417 00418 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00419 out1 = __SMUSDX(R, C2) >> 16u; 00420 00421 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00422 out2 = __SMUAD(C2, R); 00423 00424 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00425 00426 /* Reading i0+3fftLen/4 */ 00427 /* Read yb (real), xb(imag) input */ 00428 T = pSrc[i1]; 00429 00430 /* writing the butterfly processed i0 + fftLen/4 sample */ 00431 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00432 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00433 pSrc[i1] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00434 00435 /* Butterfly calculations */ 00436 00437 /* Read yd (real), xd(imag) input */ 00438 U = pSrc[i3]; 00439 00440 /* T = packed(yb-yd, xb-xd) */ 00441 T = __QSUB16(T, U); 00442 00443 00444 #ifndef ARM_MATH_BIG_ENDIAN 00445 00446 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00447 R = __SHASX(S, T); 00448 00449 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00450 S = __SHSAX(S, T); 00451 00452 00453 /* Butterfly process for the i0+fftLen/2 sample */ 00454 out1 = __SMUAD(C1, S) >> 16u; 00455 out2 = __SMUSDX(C1, S); 00456 00457 #else 00458 00459 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00460 R = __SHSAX(S, T); 00461 00462 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00463 S = __SHASX(S, T); 00464 00465 00466 /* Butterfly process for the i0+fftLen/2 sample */ 00467 out1 = __SMUSDX(S, C1) >> 16u; 00468 out2 = __SMUAD(C1, S); 00469 00470 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00471 00472 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00473 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00474 pSrc[i2] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00475 00476 /* Butterfly process for the i0+3fftLen/4 sample */ 00477 00478 #ifndef ARM_MATH_BIG_ENDIAN 00479 00480 out1 = __SMUAD(C3, R) >> 16u; 00481 out2 = __SMUSDX(C3, R); 00482 00483 #else 00484 00485 out1 = __SMUSDX(R, C3) >> 16u; 00486 out2 = __SMUAD(C3, R); 00487 00488 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00489 00490 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00491 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00492 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00493 } 00494 } 00495 /* Twiddle coefficients index modifier */ 00496 twidCoefModifier <<= 2u; 00497 } 00498 /* end of middle stage process */ 00499 00500 00501 /* data is in 10.6(q6) format for the 1024 point */ 00502 /* data is in 8.8(q8) format for the 256 point */ 00503 /* data is in 6.10(q10) format for the 64 point */ 00504 /* data is in 4.12(q12) format for the 16 point */ 00505 00506 /* Initializations for the last stage */ 00507 n1 = n2; 00508 n2 >>= 2u; 00509 00510 /* start of last stage process */ 00511 00512 /* Butterfly implementation */ 00513 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 00514 { 00515 /* index calculation for the input as, */ 00516 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 00517 i1 = i0 + n2; 00518 i2 = i1 + n2; 00519 i3 = i2 + n2; 00520 00521 /* Reading i0, i0+fftLen/2 inputs */ 00522 /* Read ya (real), xa(imag) input */ 00523 T = pSrc[i0]; 00524 /* Read yc (real), xc(imag) input */ 00525 S = pSrc[i2]; 00526 00527 /* R = packed((ya + yc), (xa + xc)) */ 00528 R = __QADD16(T, S); 00529 /* S = packed((ya - yc), (xa - xc)) */ 00530 S = __QSUB16(T, S); 00531 00532 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00533 /* Read yb (real), xb(imag) input */ 00534 T = pSrc[i1]; 00535 /* Read yd (real), xd(imag) input */ 00536 U = pSrc[i3]; 00537 00538 /* T = packed((yb + yd), (xb + xd)) */ 00539 T = __QADD16(T, U); 00540 00541 /* writing the butterfly processed i0 sample */ 00542 /* xa' = xa + xb + xc + xd */ 00543 /* ya' = ya + yb + yc + yd */ 00544 pSrc[i0] = __SHADD16(R, T); 00545 00546 /* R = packed((ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 00547 R = __SHSUB16(R, T); 00548 00549 /* Read yb (real), xb(imag) input */ 00550 T = pSrc[i1]; 00551 00552 /* writing the butterfly processed i0 + fftLen/4 sample */ 00553 /* xc' = (xa-xb+xc-xd) */ 00554 /* yc' = (ya-yb+yc-yd) */ 00555 pSrc[i1] = R; 00556 00557 /* Read yd (real), xd(imag) input */ 00558 U = pSrc[i3]; 00559 /* T = packed( (yb - yd), (xb - xd)) */ 00560 T = __QSUB16(T, U); 00561 00562 00563 #ifndef ARM_MATH_BIG_ENDIAN 00564 00565 /* writing the butterfly processed i0 + fftLen/2 sample */ 00566 /* xb' = (xa+yb-xc-yd) */ 00567 /* yb' = (ya-xb-yc+xd) */ 00568 pSrc[i2] = __SHSAX(S, T); 00569 00570 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 00571 /* xd' = (xa-yb-xc+yd) */ 00572 /* yd' = (ya+xb-yc-xd) */ 00573 pSrc[i3] = __SHASX(S, T); 00574 00575 #else 00576 00577 /* writing the butterfly processed i0 + fftLen/2 sample */ 00578 /* xb' = (xa+yb-xc-yd) */ 00579 /* yb' = (ya-xb-yc+xd) */ 00580 pSrc[i2] = __SHASX(S, T); 00581 00582 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 00583 /* xd' = (xa-yb-xc+yd) */ 00584 /* yd' = (ya+xb-yc-xd) */ 00585 pSrc[i3] = __SHSAX(S, T); 00586 00587 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00588 00589 } 00590 00591 /* end of last stage process */ 00592 00593 /* output is in 11.5(q5) format for the 1024 point */ 00594 /* output is in 9.7(q7) format for the 256 point */ 00595 /* output is in 7.9(q9) format for the 64 point */ 00596 /* output is in 5.11(q11) format for the 16 point */ 00597 00598 00599 #else 00600 00601 /* Run the below code for Cortex-M0 */ 00602 00603 q15_t R0, R1, S0, S1, T0, T1, U0, U1; 00604 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; 00605 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 00606 00607 /* Total process is divided into three stages */ 00608 00609 /* process first stage, middle stages, & last stage */ 00610 00611 /* Initializations for the first stage */ 00612 n2 = fftLen; 00613 n1 = n2; 00614 00615 /* n2 = fftLen/4 */ 00616 n2 >>= 2u; 00617 00618 /* Index for twiddle coefficient */ 00619 ic = 0u; 00620 00621 /* Index for input read and output write */ 00622 i0 = 0u; 00623 j = n2; 00624 00625 /* Input is in 1.15(q15) format */ 00626 00627 /* start of first stage process */ 00628 do 00629 { 00630 /* Butterfly implementation */ 00631 00632 /* index calculation for the input as, */ 00633 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00634 i1 = i0 + n2; 00635 i2 = i1 + n2; 00636 i3 = i2 + n2; 00637 00638 /* Reading i0, i0+fftLen/2 inputs */ 00639 00640 /* input is down scale by 4 to avoid overflow */ 00641 /* Read ya (real), xa(imag) input */ 00642 T0 = pSrc16[i0 * 2u] >> 2u; 00643 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u; 00644 00645 /* input is down scale by 4 to avoid overflow */ 00646 /* Read yc (real), xc(imag) input */ 00647 S0 = pSrc16[i2 * 2u] >> 2u; 00648 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u; 00649 00650 /* R0 = (ya + yc) */ 00651 R0 = __SSAT(T0 + S0, 16u); 00652 /* R1 = (xa + xc) */ 00653 R1 = __SSAT(T1 + S1, 16u); 00654 00655 /* S0 = (ya - yc) */ 00656 S0 = __SSAT(T0 - S0, 16); 00657 /* S1 = (xa - xc) */ 00658 S1 = __SSAT(T1 - S1, 16); 00659 00660 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00661 /* input is down scale by 4 to avoid overflow */ 00662 /* Read yb (real), xb(imag) input */ 00663 T0 = pSrc16[i1 * 2u] >> 2u; 00664 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 00665 00666 /* input is down scale by 4 to avoid overflow */ 00667 /* Read yd (real), xd(imag) input */ 00668 U0 = pSrc16[i3 * 2u] >> 2u; 00669 U1 = pSrc16[(i3 * 2u) + 1] >> 2u; 00670 00671 /* T0 = (yb + yd) */ 00672 T0 = __SSAT(T0 + U0, 16u); 00673 /* T1 = (xb + xd) */ 00674 T1 = __SSAT(T1 + U1, 16u); 00675 00676 /* writing the butterfly processed i0 sample */ 00677 /* ya' = ya + yb + yc + yd */ 00678 /* xa' = xa + xb + xc + xd */ 00679 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 00680 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 00681 00682 /* R0 = (ya + yc) - (yb + yd) */ 00683 /* R1 = (xa + xc) - (xb + xd) */ 00684 R0 = __SSAT(R0 - T0, 16u); 00685 R1 = __SSAT(R1 - T1, 16u); 00686 00687 /* co2 & si2 are read from Coefficient pointer */ 00688 Co2 = pCoef16[2u * ic * 2u]; 00689 Si2 = pCoef16[(2u * ic * 2u) + 1]; 00690 00691 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00692 out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u); 00693 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00694 out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u); 00695 00696 /* Reading i0+fftLen/4 */ 00697 /* input is down scale by 4 to avoid overflow */ 00698 /* T0 = yb, T1 = xb */ 00699 T0 = pSrc16[i1 * 2u] >> 2; 00700 T1 = pSrc16[(i1 * 2u) + 1] >> 2; 00701 00702 /* writing the butterfly processed i0 + fftLen/4 sample */ 00703 /* writing output(xc', yc') in little endian format */ 00704 pSrc16[i1 * 2u] = out1; 00705 pSrc16[(i1 * 2u) + 1] = out2; 00706 00707 /* Butterfly calculations */ 00708 /* input is down scale by 4 to avoid overflow */ 00709 /* U0 = yd, U1 = xd */ 00710 U0 = pSrc16[i3 * 2u] >> 2; 00711 U1 = pSrc16[(i3 * 2u) + 1] >> 2; 00712 /* T0 = yb-yd */ 00713 T0 = __SSAT(T0 - U0, 16); 00714 /* T1 = xb-xd */ 00715 T1 = __SSAT(T1 - U1, 16); 00716 00717 /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */ 00718 R0 = (short) __SSAT((q31_t) (S0 - T1), 16); 00719 R1 = (short) __SSAT((q31_t) (S1 + T0), 16); 00720 00721 /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */ 00722 S0 = (short) __SSAT(((q31_t) S0 + T1), 16u); 00723 S1 = (short) __SSAT(((q31_t) S1 - T0), 16u); 00724 00725 /* co1 & si1 are read from Coefficient pointer */ 00726 Co1 = pCoef16[ic * 2u]; 00727 Si1 = pCoef16[(ic * 2u) + 1]; 00728 /* Butterfly process for the i0+fftLen/2 sample */ 00729 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00730 out1 = (short) ((Si1 * S1 + Co1 * S0) >> 16); 00731 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00732 out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16); 00733 00734 /* writing output(xb', yb') in little endian format */ 00735 pSrc16[i2 * 2u] = out1; 00736 pSrc16[(i2 * 2u) + 1] = out2; 00737 00738 /* Co3 & si3 are read from Coefficient pointer */ 00739 Co3 = pCoef16[3u * (ic * 2u)]; 00740 Si3 = pCoef16[(3u * (ic * 2u)) + 1]; 00741 /* Butterfly process for the i0+3fftLen/4 sample */ 00742 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ 00743 out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u); 00744 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ 00745 out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u); 00746 /* writing output(xd', yd') in little endian format */ 00747 pSrc16[i3 * 2u] = out1; 00748 pSrc16[(i3 * 2u) + 1] = out2; 00749 00750 /* Twiddle coefficients index modifier */ 00751 ic = ic + twidCoefModifier; 00752 00753 /* Updating input index */ 00754 i0 = i0 + 1u; 00755 00756 } while(--j); 00757 /* data is in 4.11(q11) format */ 00758 00759 /* end of first stage process */ 00760 00761 00762 /* start of middle stage process */ 00763 00764 /* Twiddle coefficients index modifier */ 00765 twidCoefModifier <<= 2u; 00766 00767 /* Calculation of Middle stage */ 00768 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00769 { 00770 /* Initializations for the middle stage */ 00771 n1 = n2; 00772 n2 >>= 2u; 00773 ic = 0u; 00774 00775 for (j = 0u; j <= (n2 - 1u); j++) 00776 { 00777 /* index calculation for the coefficients */ 00778 Co1 = pCoef16[ic * 2u]; 00779 Si1 = pCoef16[(ic * 2u) + 1u]; 00780 Co2 = pCoef16[2u * (ic * 2u)]; 00781 Si2 = pCoef16[(2u * (ic * 2u)) + 1u]; 00782 Co3 = pCoef16[3u * (ic * 2u)]; 00783 Si3 = pCoef16[(3u * (ic * 2u)) + 1u]; 00784 00785 /* Twiddle coefficients index modifier */ 00786 ic = ic + twidCoefModifier; 00787 00788 /* Butterfly implementation */ 00789 for (i0 = j; i0 < fftLen; i0 += n1) 00790 { 00791 /* index calculation for the input as, */ 00792 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00793 i1 = i0 + n2; 00794 i2 = i1 + n2; 00795 i3 = i2 + n2; 00796 00797 /* Reading i0, i0+fftLen/2 inputs */ 00798 /* Read ya (real), xa(imag) input */ 00799 T0 = pSrc16[i0 * 2u]; 00800 T1 = pSrc16[(i0 * 2u) + 1u]; 00801 00802 /* Read yc (real), xc(imag) input */ 00803 S0 = pSrc16[i2 * 2u]; 00804 S1 = pSrc16[(i2 * 2u) + 1u]; 00805 00806 /* R0 = (ya + yc), R1 = (xa + xc) */ 00807 R0 = __SSAT(T0 + S0, 16); 00808 R1 = __SSAT(T1 + S1, 16); 00809 00810 /* S0 = (ya - yc), S1 =(xa - xc) */ 00811 S0 = __SSAT(T0 - S0, 16); 00812 S1 = __SSAT(T1 - S1, 16); 00813 00814 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00815 /* Read yb (real), xb(imag) input */ 00816 T0 = pSrc16[i1 * 2u]; 00817 T1 = pSrc16[(i1 * 2u) + 1u]; 00818 00819 /* Read yd (real), xd(imag) input */ 00820 U0 = pSrc16[i3 * 2u]; 00821 U1 = pSrc16[(i3 * 2u) + 1u]; 00822 00823 00824 /* T0 = (yb + yd), T1 = (xb + xd) */ 00825 T0 = __SSAT(T0 + U0, 16); 00826 T1 = __SSAT(T1 + U1, 16); 00827 00828 /* writing the butterfly processed i0 sample */ 00829 00830 /* xa' = xa + xb + xc + xd */ 00831 /* ya' = ya + yb + yc + yd */ 00832 out1 = ((R0 >> 1u) + (T0 >> 1u)) >> 1u; 00833 out2 = ((R1 >> 1u) + (T1 >> 1u)) >> 1u; 00834 00835 pSrc16[i0 * 2u] = out1; 00836 pSrc16[(2u * i0) + 1u] = out2; 00837 00838 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 00839 R0 = (R0 >> 1u) - (T0 >> 1u); 00840 R1 = (R1 >> 1u) - (T1 >> 1u); 00841 00842 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00843 out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u); 00844 00845 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00846 out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u); 00847 00848 /* Reading i0+3fftLen/4 */ 00849 /* Read yb (real), xb(imag) input */ 00850 T0 = pSrc16[i1 * 2u]; 00851 T1 = pSrc16[(i1 * 2u) + 1u]; 00852 00853 /* writing the butterfly processed i0 + fftLen/4 sample */ 00854 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00855 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00856 pSrc16[i1 * 2u] = out1; 00857 pSrc16[(i1 * 2u) + 1u] = out2; 00858 00859 /* Butterfly calculations */ 00860 00861 /* Read yd (real), xd(imag) input */ 00862 U0 = pSrc16[i3 * 2u]; 00863 U1 = pSrc16[(i3 * 2u) + 1u]; 00864 00865 /* T0 = yb-yd, T1 = xb-xd */ 00866 T0 = __SSAT(T0 - U0, 16); 00867 T1 = __SSAT(T1 - U1, 16); 00868 00869 /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */ 00870 R0 = (S0 >> 1u) - (T1 >> 1u); 00871 R1 = (S1 >> 1u) + (T0 >> 1u); 00872 00873 /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */ 00874 S0 = (S0 >> 1u) + (T1 >> 1u); 00875 S1 = (S1 >> 1u) - (T0 >> 1u); 00876 00877 /* Butterfly process for the i0+fftLen/2 sample */ 00878 out1 = (short) ((Co1 * S0 + Si1 * S1) >> 16u); 00879 00880 out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16u); 00881 00882 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00883 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00884 pSrc16[i2 * 2u] = out1; 00885 pSrc16[(i2 * 2u) + 1u] = out2; 00886 00887 /* Butterfly process for the i0+3fftLen/4 sample */ 00888 out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u); 00889 00890 out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u); 00891 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ 00892 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ 00893 pSrc16[i3 * 2u] = out1; 00894 pSrc16[(i3 * 2u) + 1u] = out2; 00895 } 00896 } 00897 /* Twiddle coefficients index modifier */ 00898 twidCoefModifier <<= 2u; 00899 } 00900 /* end of middle stage process */ 00901 00902 00903 /* data is in 10.6(q6) format for the 1024 point */ 00904 /* data is in 8.8(q8) format for the 256 point */ 00905 /* data is in 6.10(q10) format for the 64 point */ 00906 /* data is in 4.12(q12) format for the 16 point */ 00907 00908 /* Initializations for the last stage */ 00909 n1 = n2; 00910 n2 >>= 2u; 00911 00912 /* start of last stage process */ 00913 00914 /* Butterfly implementation */ 00915 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 00916 { 00917 /* index calculation for the input as, */ 00918 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00919 i1 = i0 + n2; 00920 i2 = i1 + n2; 00921 i3 = i2 + n2; 00922 00923 /* Reading i0, i0+fftLen/2 inputs */ 00924 /* Read ya (real), xa(imag) input */ 00925 T0 = pSrc16[i0 * 2u]; 00926 T1 = pSrc16[(i0 * 2u) + 1u]; 00927 00928 /* Read yc (real), xc(imag) input */ 00929 S0 = pSrc16[i2 * 2u]; 00930 S1 = pSrc16[(i2 * 2u) + 1u]; 00931 00932 /* R0 = (ya + yc), R1 = (xa + xc) */ 00933 R0 = __SSAT(T0 + S0, 16u); 00934 R1 = __SSAT(T1 + S1, 16u); 00935 00936 /* S0 = (ya - yc), S1 = (xa - xc) */ 00937 S0 = __SSAT(T0 - S0, 16u); 00938 S1 = __SSAT(T1 - S1, 16u); 00939 00940 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00941 /* Read yb (real), xb(imag) input */ 00942 T0 = pSrc16[i1 * 2u]; 00943 T1 = pSrc16[(i1 * 2u) + 1u]; 00944 /* Read yd (real), xd(imag) input */ 00945 U0 = pSrc16[i3 * 2u]; 00946 U1 = pSrc16[(i3 * 2u) + 1u]; 00947 00948 /* T0 = (yb + yd), T1 = (xb + xd)) */ 00949 T0 = __SSAT(T0 + U0, 16u); 00950 T1 = __SSAT(T1 + U1, 16u); 00951 00952 /* writing the butterfly processed i0 sample */ 00953 /* xa' = xa + xb + xc + xd */ 00954 /* ya' = ya + yb + yc + yd */ 00955 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 00956 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 00957 00958 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 00959 R0 = (R0 >> 1u) - (T0 >> 1u); 00960 R1 = (R1 >> 1u) - (T1 >> 1u); 00961 /* Read yb (real), xb(imag) input */ 00962 T0 = pSrc16[i1 * 2u]; 00963 T1 = pSrc16[(i1 * 2u) + 1u]; 00964 00965 /* writing the butterfly processed i0 + fftLen/4 sample */ 00966 /* xc' = (xa-xb+xc-xd) */ 00967 /* yc' = (ya-yb+yc-yd) */ 00968 pSrc16[i1 * 2u] = R0; 00969 pSrc16[(i1 * 2u) + 1u] = R1; 00970 00971 /* Read yd (real), xd(imag) input */ 00972 U0 = pSrc16[i3 * 2u]; 00973 U1 = pSrc16[(i3 * 2u) + 1u]; 00974 /* T0 = (yb - yd), T1 = (xb - xd) */ 00975 T0 = __SSAT(T0 - U0, 16u); 00976 T1 = __SSAT(T1 - U1, 16u); 00977 00978 /* writing the butterfly processed i0 + fftLen/2 sample */ 00979 /* xb' = (xa+yb-xc-yd) */ 00980 /* yb' = (ya-xb-yc+xd) */ 00981 pSrc16[i2 * 2u] = (S0 >> 1u) + (T1 >> 1u); 00982 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u); 00983 00984 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 00985 /* xd' = (xa-yb-xc+yd) */ 00986 /* yd' = (ya+xb-yc-xd) */ 00987 pSrc16[i3 * 2u] = (S0 >> 1u) - (T1 >> 1u); 00988 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u); 00989 00990 } 00991 00992 /* end of last stage process */ 00993 00994 /* output is in 11.5(q5) format for the 1024 point */ 00995 /* output is in 9.7(q7) format for the 256 point */ 00996 /* output is in 7.9(q9) format for the 64 point */ 00997 /* output is in 5.11(q11) format for the 16 point */ 00998 00999 #endif /* #ifndef ARM_MATH_CM0 */ 01000 01001 } 01002 01003 01013 /* 01014 * Radix-4 IFFT algorithm used is : 01015 * 01016 * CIFFT uses same twiddle coefficients as CFFT function 01017 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 01018 * 01019 * 01020 * IFFT is implemented with following changes in equations from FFT 01021 * 01022 * Input real and imaginary data: 01023 * x(n) = xa + j * ya 01024 * x(n+N/4 ) = xb + j * yb 01025 * x(n+N/2 ) = xc + j * yc 01026 * x(n+3N 4) = xd + j * yd 01027 * 01028 * 01029 * Output real and imaginary data: 01030 * x(4r) = xa'+ j * ya' 01031 * x(4r+1) = xb'+ j * yb' 01032 * x(4r+2) = xc'+ j * yc' 01033 * x(4r+3) = xd'+ j * yd' 01034 * 01035 * 01036 * Twiddle factors for radix-4 IFFT: 01037 * Wn = co1 + j * (si1) 01038 * W2n = co2 + j * (si2) 01039 * W3n = co3 + j * (si3) 01040 01041 * The real and imaginary output values for the radix-4 butterfly are 01042 * xa' = xa + xb + xc + xd 01043 * ya' = ya + yb + yc + yd 01044 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 01045 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 01046 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 01047 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 01048 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 01049 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 01050 * 01051 */ 01052 01053 void arm_radix4_butterfly_inverse_q15( 01054 q15_t * pSrc16, 01055 uint32_t fftLen, 01056 q15_t * pCoef16, 01057 uint32_t twidCoefModifier) 01058 { 01059 01060 #ifndef ARM_MATH_CM0 01061 01062 /* Run the below code for Cortex-M4 and Cortex-M3 */ 01063 01064 q31_t R, S, T, U; 01065 q31_t C1, C2, C3, out1, out2; 01066 q31_t *pSrc, *pCoeff; 01067 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 01068 q15_t in; 01069 01070 /* Total process is divided into three stages */ 01071 01072 /* process first stage, middle stages, & last stage */ 01073 01074 /* pointer initializations for SIMD calculations */ 01075 pSrc = (q31_t *) pSrc16; 01076 pCoeff = (q31_t *) pCoef16; 01077 01078 /* Initializations for the first stage */ 01079 n2 = fftLen; 01080 n1 = n2; 01081 01082 /* n2 = fftLen/4 */ 01083 n2 >>= 2u; 01084 01085 /* Index for twiddle coefficient */ 01086 ic = 0u; 01087 01088 /* Index for input read and output write */ 01089 i0 = 0u; 01090 01091 j = n2; 01092 01093 /* Input is in 1.15(q15) format */ 01094 01095 /* Start of first stage process */ 01096 do 01097 { 01098 /* Butterfly implementation */ 01099 01100 /* index calculation for the input as, */ 01101 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 01102 i1 = i0 + n2; 01103 i2 = i1 + n2; 01104 i3 = i2 + n2; 01105 01106 /* Reading i0, i0+fftLen/2 inputs */ 01107 /* Read ya (real), xa(imag) input */ 01108 T = pSrc[i0]; 01109 in = ((int16_t) (T & 0xFFFF)) >> 2; 01110 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01111 /* Read yc (real), xc(imag) input */ 01112 S = pSrc[i2]; 01113 in = ((int16_t) (S & 0xFFFF)) >> 2; 01114 S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01115 01116 /* R = packed((ya + yc), (xa + xc) ) */ 01117 R = __QADD16(T, S); 01118 /* S = packed((ya - yc), (xa - xc) ) */ 01119 S = __QSUB16(T, S); 01120 01121 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01122 /* Read yb (real), xb(imag) input */ 01123 T = pSrc[i1]; 01124 in = ((int16_t) (T & 0xFFFF)) >> 2; 01125 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01126 /* Read yd (real), xd(imag) input */ 01127 U = pSrc[i3]; 01128 in = ((int16_t) (U & 0xFFFF)) >> 2; 01129 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01130 01131 /* T = packed((yb + yd), (xb + xd) ) */ 01132 T = __QADD16(T, U); 01133 01134 /* writing the butterfly processed i0 sample */ 01135 /* xa' = xa + xb + xc + xd */ 01136 /* ya' = ya + yb + yc + yd */ 01137 pSrc[i0] = __SHADD16(R, T); 01138 01139 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 01140 R = __QSUB16(R, T); 01141 /* co2 & si2 are read from SIMD Coefficient pointer */ 01142 C2 = pCoeff[2u * ic]; 01143 01144 #ifndef ARM_MATH_BIG_ENDIAN 01145 01146 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01147 out1 = __SMUSD(C2, R) >> 16u; 01148 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01149 out2 = __SMUADX(C2, R); 01150 01151 #else 01152 01153 /* xc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01154 out1 = __SMUADX(C2, R) >> 16u; 01155 /* yc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01156 out2 = __SMUSD(-C2, R); 01157 01158 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01159 01160 /* Reading i0+fftLen/4 */ 01161 /* T = packed(yb, xb) */ 01162 T = pSrc[i1]; 01163 in = ((int16_t) (T & 0xFFFF)) >> 2; 01164 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01165 01166 /* writing the butterfly processed i0 + fftLen/4 sample */ 01167 /* writing output(xc', yc') in little endian format */ 01168 pSrc[i1] = (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01169 01170 /* Butterfly calculations */ 01171 /* U = packed(yd, xd) */ 01172 U = pSrc[i3]; 01173 in = ((int16_t) (U & 0xFFFF)) >> 2; 01174 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01175 01176 /* T = packed(yb-yd, xb-xd) */ 01177 T = __QSUB16(T, U); 01178 01179 #ifndef ARM_MATH_BIG_ENDIAN 01180 01181 /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */ 01182 R = __QSAX(S, T); 01183 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ 01184 S = __QASX(S, T); 01185 01186 #else 01187 01188 /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */ 01189 R = __QASX(S, T); 01190 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ 01191 S = __QSAX(S, T); 01192 01193 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01194 01195 /* co1 & si1 are read from SIMD Coefficient pointer */ 01196 C1 = pCoeff[ic]; 01197 /* Butterfly process for the i0+fftLen/2 sample */ 01198 01199 #ifndef ARM_MATH_BIG_ENDIAN 01200 01201 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01202 out1 = __SMUSD(C1, S) >> 16u; 01203 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01204 out2 = __SMUADX(C1, S); 01205 01206 #else 01207 01208 /* xb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01209 out1 = __SMUADX(C1, S) >> 16u; 01210 /* yb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01211 out2 = __SMUSD(-C1, S); 01212 01213 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01214 01215 /* writing output(xb', yb') in little endian format */ 01216 pSrc[i2] = ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 01217 01218 /* co3 & si3 are read from SIMD Coefficient pointer */ 01219 C3 = pCoeff[3u * ic]; 01220 /* Butterfly process for the i0+3fftLen/4 sample */ 01221 01222 #ifndef ARM_MATH_BIG_ENDIAN 01223 01224 /* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */ 01225 out1 = __SMUSD(C3, R) >> 16u; 01226 /* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */ 01227 out2 = __SMUADX(C3, R); 01228 01229 #else 01230 01231 /* xd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */ 01232 out1 = __SMUADX(C3, R) >> 16u; 01233 /* yd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */ 01234 out2 = __SMUSD(-C3, R); 01235 01236 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01237 01238 /* writing output(xd', yd') in little endian format */ 01239 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01240 01241 /* Twiddle coefficients index modifier */ 01242 ic = ic + twidCoefModifier; 01243 01244 /* Updating input index */ 01245 i0 = i0 + 1u; 01246 01247 } while(--j); 01248 01249 /* End of first stage process */ 01250 01251 /* data is in 4.11(q11) format */ 01252 01253 01254 /* Start of Middle stage process */ 01255 01256 /* Twiddle coefficients index modifier */ 01257 twidCoefModifier <<= 2u; 01258 01259 /* Calculation of Middle stage */ 01260 for (k = fftLen / 4u; k > 4u; k >>= 2u) 01261 { 01262 /* Initializations for the middle stage */ 01263 n1 = n2; 01264 n2 >>= 2u; 01265 ic = 0u; 01266 01267 for (j = 0u; j <= (n2 - 1u); j++) 01268 { 01269 /* index calculation for the coefficients */ 01270 C1 = pCoeff[ic]; 01271 C2 = pCoeff[2u * ic]; 01272 C3 = pCoeff[3u * ic]; 01273 01274 /* Twiddle coefficients index modifier */ 01275 ic = ic + twidCoefModifier; 01276 01277 /* Butterfly implementation */ 01278 for (i0 = j; i0 < fftLen; i0 += n1) 01279 { 01280 /* index calculation for the input as, */ 01281 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 01282 i1 = i0 + n2; 01283 i2 = i1 + n2; 01284 i3 = i2 + n2; 01285 01286 /* Reading i0, i0+fftLen/2 inputs */ 01287 /* Read ya (real), xa(imag) input */ 01288 T = pSrc[i0]; 01289 01290 /* Read yc (real), xc(imag) input */ 01291 S = pSrc[i2]; 01292 01293 01294 /* R = packed( (ya + yc), (xa + xc)) */ 01295 R = __QADD16(T, S); 01296 /* S = packed((ya - yc), (xa - xc)) */ 01297 S = __QSUB16(T, S); 01298 01299 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01300 /* Read yb (real), xb(imag) input */ 01301 T = pSrc[i1]; 01302 01303 /* Read yd (real), xd(imag) input */ 01304 U = pSrc[i3]; 01305 01306 01307 /* T = packed( (yb + yd), (xb + xd)) */ 01308 T = __QADD16(T, U); 01309 01310 /* writing the butterfly processed i0 sample */ 01311 /* xa' = xa + xb + xc + xd */ 01312 /* ya' = ya + yb + yc + yd */ 01313 out1 = __SHADD16(R, T); 01314 in = ((int16_t) (out1 & 0xFFFF)) >> 1; 01315 out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF); 01316 pSrc[i0] = out1; 01317 01318 01319 01320 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 01321 R = __SHSUB16(R, T); 01322 01323 01324 #ifndef ARM_MATH_BIG_ENDIAN 01325 01326 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */ 01327 out1 = __SMUSD(C2, R) >> 16u; 01328 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01329 out2 = __SMUADX(C2, R); 01330 01331 #else 01332 01333 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01334 out1 = __SMUADX(R, C2) >> 16u; 01335 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */ 01336 out2 = __SMUSD(-C2, R); 01337 01338 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01339 01340 /* Reading i0+3fftLen/4 */ 01341 /* Read yb (real), xb(imag) input */ 01342 T = pSrc[i1]; 01343 01344 /* writing the butterfly processed i0 + fftLen/4 sample */ 01345 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01346 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01347 pSrc[i1] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01348 01349 /* Butterfly calculations */ 01350 /* Read yd (real), xd(imag) input */ 01351 U = pSrc[i3]; 01352 01353 /* T = packed(yb-yd, xb-xd) */ 01354 T = __QSUB16(T, U); 01355 01356 01357 #ifndef ARM_MATH_BIG_ENDIAN 01358 01359 /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */ 01360 R = __SHSAX(S, T); 01361 01362 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ 01363 S = __SHASX(S, T); 01364 /* Butterfly process for the i0+fftLen/2 sample */ 01365 out1 = __SMUSD(C1, S) >> 16u; 01366 out2 = __SMUADX(C1, S); 01367 01368 #else 01369 01370 /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */ 01371 R = __SHASX(S, T); 01372 01373 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ 01374 S = __SHSAX(S, T); 01375 /* Butterfly process for the i0+fftLen/2 sample */ 01376 out1 = __SMUADX(S, C1) >> 16u; 01377 out2 = __SMUSD(-C1, S); 01378 01379 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01380 01381 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01382 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01383 pSrc[i2] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01384 01385 /* Butterfly process for the i0+3fftLen/4 sample */ 01386 01387 #ifndef ARM_MATH_BIG_ENDIAN 01388 01389 out1 = __SMUSD(C3, R) >> 16u; 01390 out2 = __SMUADX(C3, R); 01391 01392 #else 01393 01394 out1 = __SMUADX(C3, R) >> 16u; 01395 out2 = __SMUSD(-C3, R); 01396 01397 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01398 01399 /* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */ 01400 /* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */ 01401 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01402 01403 01404 } 01405 } 01406 /* Twiddle coefficients index modifier */ 01407 twidCoefModifier <<= 2u; 01408 } 01409 /* End of Middle stages process */ 01410 01411 01412 /* data is in 10.6(q6) format for the 1024 point */ 01413 /* data is in 8.8(q8) format for the 256 point */ 01414 /* data is in 6.10(q10) format for the 64 point */ 01415 /* data is in 4.12(q12) format for the 16 point */ 01416 01417 /* start of last stage process */ 01418 01419 01420 /* Initializations for the last stage */ 01421 n1 = n2; 01422 n2 >>= 2u; 01423 01424 /* Butterfly implementation */ 01425 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 01426 { 01427 /* index calculation for the input as, */ 01428 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 01429 i1 = i0 + n2; 01430 i2 = i1 + n2; 01431 i3 = i2 + n2; 01432 01433 /* Reading i0, i0+fftLen/2 inputs */ 01434 /* Read ya (real), xa(imag) input */ 01435 T = pSrc[i0]; 01436 /* Read yc (real), xc(imag) input */ 01437 S = pSrc[i2]; 01438 01439 /* R = packed((ya + yc), (xa + xc)) */ 01440 R = __QADD16(T, S); 01441 /* S = packed((ya - yc), (xa - xc)) */ 01442 S = __QSUB16(T, S); 01443 01444 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01445 /* Read yb (real), xb(imag) input */ 01446 T = pSrc[i1]; 01447 /* Read yd (real), xd(imag) input */ 01448 U = pSrc[i3]; 01449 01450 /* T = packed((yb + yd), (xb + xd)) */ 01451 T = __QADD16(T, U); 01452 01453 /* writing the butterfly processed i0 sample */ 01454 /* xa' = xa + xb + xc + xd */ 01455 /* ya' = ya + yb + yc + yd */ 01456 pSrc[i0] = __SHADD16(R, T); 01457 01458 /* R = packed((ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 01459 R = __SHSUB16(R, T); 01460 01461 /* Read yb (real), xb(imag) input */ 01462 T = pSrc[i1]; 01463 01464 /* writing the butterfly processed i0 + fftLen/4 sample */ 01465 /* xc' = (xa-xb+xc-xd) */ 01466 /* yc' = (ya-yb+yc-yd) */ 01467 pSrc[i1] = R; 01468 01469 /* Read yd (real), xd(imag) input */ 01470 U = pSrc[i3]; 01471 /* T = packed( (yb - yd), (xb - xd)) */ 01472 T = __QSUB16(T, U); 01473 01474 01475 #ifndef ARM_MATH_BIG_ENDIAN 01476 01477 /* writing the butterfly processed i0 + fftLen/2 sample */ 01478 /* xb' = (xa-yb-xc+yd) */ 01479 /* yb' = (ya+xb-yc-xd) */ 01480 pSrc[i2] = __SHASX(S, T); 01481 01482 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 01483 /* xd' = (xa+yb-xc-yd) */ 01484 /* yd' = (ya-xb-yc+xd) */ 01485 pSrc[i3] = __SHSAX(S, T); 01486 01487 01488 #else 01489 01490 /* writing the butterfly processed i0 + fftLen/2 sample */ 01491 /* xb' = (xa-yb-xc+yd) */ 01492 /* yb' = (ya+xb-yc-xd) */ 01493 pSrc[i2] = __SHSAX(S, T); 01494 01495 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 01496 /* xd' = (xa+yb-xc-yd) */ 01497 /* yd' = (ya-xb-yc+xd) */ 01498 pSrc[i3] = __SHASX(S, T); 01499 01500 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01501 01502 } 01503 /* end of last stage process */ 01504 01505 /* output is in 11.5(q5) format for the 1024 point */ 01506 /* output is in 9.7(q7) format for the 256 point */ 01507 /* output is in 7.9(q9) format for the 64 point */ 01508 /* output is in 5.11(q11) format for the 16 point */ 01509 01510 01511 #else 01512 01513 /* Run the below code for Cortex-M0 */ 01514 01515 q15_t R0, R1, S0, S1, T0, T1, U0, U1; 01516 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; 01517 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 01518 01519 /* Total process is divided into three stages */ 01520 01521 /* process first stage, middle stages, & last stage */ 01522 01523 /* Initializations for the first stage */ 01524 n2 = fftLen; 01525 n1 = n2; 01526 01527 /* n2 = fftLen/4 */ 01528 n2 >>= 2u; 01529 01530 /* Index for twiddle coefficient */ 01531 ic = 0u; 01532 01533 /* Index for input read and output write */ 01534 i0 = 0u; 01535 01536 j = n2; 01537 01538 /* Input is in 1.15(q15) format */ 01539 01540 /* Start of first stage process */ 01541 do 01542 { 01543 /* Butterfly implementation */ 01544 01545 /* index calculation for the input as, */ 01546 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01547 i1 = i0 + n2; 01548 i2 = i1 + n2; 01549 i3 = i2 + n2; 01550 01551 /* Reading i0, i0+fftLen/2 inputs */ 01552 /* input is down scale by 4 to avoid overflow */ 01553 /* Read ya (real), xa(imag) input */ 01554 T0 = pSrc16[i0 * 2u] >> 2u; 01555 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u; 01556 /* input is down scale by 4 to avoid overflow */ 01557 /* Read yc (real), xc(imag) input */ 01558 S0 = pSrc16[i2 * 2u] >> 2u; 01559 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u; 01560 01561 /* R0 = (ya + yc), R1 = (xa + xc) */ 01562 R0 = __SSAT(T0 + S0, 16u); 01563 R1 = __SSAT(T1 + S1, 16u); 01564 /* S0 = (ya - yc), S1 = (xa - xc) */ 01565 S0 = __SSAT(T0 - S0, 16u); 01566 S1 = __SSAT(T1 - S1, 16u); 01567 01568 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01569 /* input is down scale by 4 to avoid overflow */ 01570 /* Read yb (real), xb(imag) input */ 01571 T0 = pSrc16[i1 * 2u] >> 2u; 01572 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 01573 /* Read yd (real), xd(imag) input */ 01574 /* input is down scale by 4 to avoid overflow */ 01575 U0 = pSrc16[i3 * 2u] >> 2u; 01576 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u; 01577 01578 /* T0 = (yb + yd), T1 = (xb + xd) */ 01579 T0 = __SSAT(T0 + U0, 16u); 01580 T1 = __SSAT(T1 + U1, 16u); 01581 01582 /* writing the butterfly processed i0 sample */ 01583 /* xa' = xa + xb + xc + xd */ 01584 /* ya' = ya + yb + yc + yd */ 01585 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 01586 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 01587 01588 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */ 01589 R0 = __SSAT(R0 - T0, 16u); 01590 R1 = __SSAT(R1 - T1, 16u); 01591 /* co2 & si2 are read from Coefficient pointer */ 01592 Co2 = pCoef16[2u * ic * 2u]; 01593 Si2 = pCoef16[(2u * ic * 2u) + 1u]; 01594 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01595 out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16u); 01596 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01597 out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16u); 01598 01599 /* Reading i0+fftLen/4 */ 01600 /* input is down scale by 4 to avoid overflow */ 01601 /* T0 = yb, T1 = xb */ 01602 T0 = pSrc16[i1 * 2u] >> 2u; 01603 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 01604 01605 /* writing the butterfly processed i0 + fftLen/4 sample */ 01606 /* writing output(xc', yc') in little endian format */ 01607 pSrc16[i1 * 2u] = out1; 01608 pSrc16[(i1 * 2u) + 1u] = out2; 01609 01610 /* Butterfly calculations */ 01611 /* input is down scale by 4 to avoid overflow */ 01612 /* U0 = yd, U1 = xd) */ 01613 U0 = pSrc16[i3 * 2u] >> 2u; 01614 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u; 01615 01616 /* T0 = yb-yd, T1 = xb-xd) */ 01617 T0 = __SSAT(T0 - U0, 16u); 01618 T1 = __SSAT(T1 - U1, 16u); 01619 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ 01620 R0 = (short) __SSAT((q31_t) (S0 + T1), 16); 01621 R1 = (short) __SSAT((q31_t) (S1 - T0), 16); 01622 /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ 01623 S0 = (short) __SSAT((q31_t) (S0 - T1), 16); 01624 S1 = (short) __SSAT((q31_t) (S1 + T0), 16); 01625 01626 /* co1 & si1 are read from Coefficient pointer */ 01627 Co1 = pCoef16[ic * 2u]; 01628 Si1 = pCoef16[(ic * 2u) + 1u]; 01629 /* Butterfly process for the i0+fftLen/2 sample */ 01630 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01631 out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u); 01632 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01633 out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u); 01634 /* writing output(xb', yb') in little endian format */ 01635 pSrc16[i2 * 2u] = out1; 01636 pSrc16[(i2 * 2u) + 1u] = out2; 01637 01638 /* Co3 & si3 are read from Coefficient pointer */ 01639 Co3 = pCoef16[3u * ic * 2u]; 01640 Si3 = pCoef16[(3u * ic * 2u) + 1u]; 01641 /* Butterfly process for the i0+3fftLen/4 sample */ 01642 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ 01643 out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u); 01644 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ 01645 out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u); 01646 /* writing output(xd', yd') in little endian format */ 01647 pSrc16[i3 * 2u] = out1; 01648 pSrc16[(i3 * 2u) + 1u] = out2; 01649 01650 /* Twiddle coefficients index modifier */ 01651 ic = ic + twidCoefModifier; 01652 01653 /* Updating input index */ 01654 i0 = i0 + 1u; 01655 01656 } while(--j); 01657 01658 /* End of first stage process */ 01659 01660 /* data is in 4.11(q11) format */ 01661 01662 01663 /* Start of Middle stage process */ 01664 01665 /* Twiddle coefficients index modifier */ 01666 twidCoefModifier <<= 2u; 01667 01668 /* Calculation of Middle stage */ 01669 for (k = fftLen / 4u; k > 4u; k >>= 2u) 01670 { 01671 /* Initializations for the middle stage */ 01672 n1 = n2; 01673 n2 >>= 2u; 01674 ic = 0u; 01675 01676 for (j = 0u; j <= (n2 - 1u); j++) 01677 { 01678 /* index calculation for the coefficients */ 01679 Co1 = pCoef16[ic * 2u]; 01680 Si1 = pCoef16[(ic * 2u) + 1u]; 01681 Co2 = pCoef16[2u * ic * 2u]; 01682 Si2 = pCoef16[2u * ic * 2u + 1u]; 01683 Co3 = pCoef16[3u * ic * 2u]; 01684 Si3 = pCoef16[(3u * ic * 2u) + 1u]; 01685 01686 /* Twiddle coefficients index modifier */ 01687 ic = ic + twidCoefModifier; 01688 01689 /* Butterfly implementation */ 01690 for (i0 = j; i0 < fftLen; i0 += n1) 01691 { 01692 /* index calculation for the input as, */ 01693 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01694 i1 = i0 + n2; 01695 i2 = i1 + n2; 01696 i3 = i2 + n2; 01697 01698 /* Reading i0, i0+fftLen/2 inputs */ 01699 /* Read ya (real), xa(imag) input */ 01700 T0 = pSrc16[i0 * 2u]; 01701 T1 = pSrc16[(i0 * 2u) + 1u]; 01702 01703 /* Read yc (real), xc(imag) input */ 01704 S0 = pSrc16[i2 * 2u]; 01705 S1 = pSrc16[(i2 * 2u) + 1u]; 01706 01707 01708 /* R0 = (ya + yc), R1 = (xa + xc) */ 01709 R0 = __SSAT(T0 + S0, 16u); 01710 R1 = __SSAT(T1 + S1, 16u); 01711 /* S0 = (ya - yc), S1 = (xa - xc) */ 01712 S0 = __SSAT(T0 - S0, 16u); 01713 S1 = __SSAT(T1 - S1, 16u); 01714 01715 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01716 /* Read yb (real), xb(imag) input */ 01717 T0 = pSrc16[i1 * 2u]; 01718 T1 = pSrc16[(i1 * 2u) + 1u]; 01719 01720 /* Read yd (real), xd(imag) input */ 01721 U0 = pSrc16[i3 * 2u]; 01722 U1 = pSrc16[(i3 * 2u) + 1u]; 01723 01724 /* T0 = (yb + yd), T1 = (xb + xd) */ 01725 T0 = __SSAT(T0 + U0, 16u); 01726 T1 = __SSAT(T1 + U1, 16u); 01727 01728 /* writing the butterfly processed i0 sample */ 01729 /* xa' = xa + xb + xc + xd */ 01730 /* ya' = ya + yb + yc + yd */ 01731 pSrc16[i0 * 2u] = ((R0 >> 1u) + (T0 >> 1u)) >> 1u; 01732 pSrc16[(i0 * 2u) + 1u] = ((R1 >> 1u) + (T1 >> 1u)) >> 1u; 01733 01734 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 01735 R0 = (R0 >> 1u) - (T0 >> 1u); 01736 R1 = (R1 >> 1u) - (T1 >> 1u); 01737 01738 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */ 01739 out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16); 01740 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01741 out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16); 01742 01743 /* Reading i0+3fftLen/4 */ 01744 /* Read yb (real), xb(imag) input */ 01745 T0 = pSrc16[i1 * 2u]; 01746 T1 = pSrc16[(i1 * 2u) + 1u]; 01747 01748 /* writing the butterfly processed i0 + fftLen/4 sample */ 01749 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01750 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01751 pSrc16[i1 * 2u] = out1; 01752 pSrc16[(i1 * 2u) + 1u] = out2; 01753 01754 /* Butterfly calculations */ 01755 /* Read yd (real), xd(imag) input */ 01756 U0 = pSrc16[i3 * 2u]; 01757 U1 = pSrc16[(i3 * 2u) + 1u]; 01758 01759 /* T0 = yb-yd, T1 = xb-xd) */ 01760 T0 = __SSAT(T0 - U0, 16u); 01761 T1 = __SSAT(T1 - U1, 16u); 01762 01763 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ 01764 R0 = (S0 >> 1u) + (T1 >> 1u); 01765 R1 = (S1 >> 1u) - (T0 >> 1u); 01766 01767 /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ 01768 S0 = (S0 >> 1u) - (T1 >> 1u); 01769 S1 = (S1 >> 1u) + (T0 >> 1u); 01770 01771 /* Butterfly process for the i0+fftLen/2 sample */ 01772 out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u); 01773 out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u); 01774 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01775 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01776 pSrc16[i2 * 2u] = out1; 01777 pSrc16[(i2 * 2u) + 1u] = out2; 01778 01779 /* Butterfly process for the i0+3fftLen/4 sample */ 01780 out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u); 01781 01782 out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u); 01783 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ 01784 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ 01785 pSrc16[i3 * 2u] = out1; 01786 pSrc16[(i3 * 2u) + 1u] = out2; 01787 01788 01789 } 01790 } 01791 /* Twiddle coefficients index modifier */ 01792 twidCoefModifier <<= 2u; 01793 } 01794 /* End of Middle stages process */ 01795 01796 01797 /* data is in 10.6(q6) format for the 1024 point */ 01798 /* data is in 8.8(q8) format for the 256 point */ 01799 /* data is in 6.10(q10) format for the 64 point */ 01800 /* data is in 4.12(q12) format for the 16 point */ 01801 01802 /* start of last stage process */ 01803 01804 01805 /* Initializations for the last stage */ 01806 n1 = n2; 01807 n2 >>= 2u; 01808 01809 /* Butterfly implementation */ 01810 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 01811 { 01812 /* index calculation for the input as, */ 01813 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01814 i1 = i0 + n2; 01815 i2 = i1 + n2; 01816 i3 = i2 + n2; 01817 01818 /* Reading i0, i0+fftLen/2 inputs */ 01819 /* Read ya (real), xa(imag) input */ 01820 T0 = pSrc16[i0 * 2u]; 01821 T1 = pSrc16[(i0 * 2u) + 1u]; 01822 /* Read yc (real), xc(imag) input */ 01823 S0 = pSrc16[i2 * 2u]; 01824 S1 = pSrc16[(i2 * 2u) + 1u]; 01825 01826 /* R0 = (ya + yc), R1 = (xa + xc) */ 01827 R0 = __SSAT(T0 + S0, 16u); 01828 R1 = __SSAT(T1 + S1, 16u); 01829 /* S0 = (ya - yc), S1 = (xa - xc) */ 01830 S0 = __SSAT(T0 - S0, 16u); 01831 S1 = __SSAT(T1 - S1, 16u); 01832 01833 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01834 /* Read yb (real), xb(imag) input */ 01835 T0 = pSrc16[i1 * 2u]; 01836 T1 = pSrc16[(i1 * 2u) + 1u]; 01837 /* Read yd (real), xd(imag) input */ 01838 U0 = pSrc16[i3 * 2u]; 01839 U1 = pSrc16[(i3 * 2u) + 1u]; 01840 01841 /* T0 = (yb + yd), T1 = (xb + xd) */ 01842 T0 = __SSAT(T0 + U0, 16u); 01843 T1 = __SSAT(T1 + U1, 16u); 01844 01845 /* writing the butterfly processed i0 sample */ 01846 /* xa' = xa + xb + xc + xd */ 01847 /* ya' = ya + yb + yc + yd */ 01848 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 01849 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 01850 01851 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 01852 R0 = (R0 >> 1u) - (T0 >> 1u); 01853 R1 = (R1 >> 1u) - (T1 >> 1u); 01854 01855 /* Read yb (real), xb(imag) input */ 01856 T0 = pSrc16[i1 * 2u]; 01857 T1 = pSrc16[(i1 * 2u) + 1u]; 01858 01859 /* writing the butterfly processed i0 + fftLen/4 sample */ 01860 /* xc' = (xa-xb+xc-xd) */ 01861 /* yc' = (ya-yb+yc-yd) */ 01862 pSrc16[i1 * 2u] = R0; 01863 pSrc16[(i1 * 2u) + 1u] = R1; 01864 01865 /* Read yd (real), xd(imag) input */ 01866 U0 = pSrc16[i3 * 2u]; 01867 U1 = pSrc16[(i3 * 2u) + 1u]; 01868 /* T0 = (yb - yd), T1 = (xb - xd) */ 01869 T0 = __SSAT(T0 - U0, 16u); 01870 T1 = __SSAT(T1 - U1, 16u); 01871 01872 /* writing the butterfly processed i0 + fftLen/2 sample */ 01873 /* xb' = (xa-yb-xc+yd) */ 01874 /* yb' = (ya+xb-yc-xd) */ 01875 pSrc16[i2 * 2u] = (S0 >> 1u) - (T1 >> 1u); 01876 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u); 01877 01878 01879 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 01880 /* xd' = (xa+yb-xc-yd) */ 01881 /* yd' = (ya-xb-yc+xd) */ 01882 pSrc16[i3 * 2u] = (S0 >> 1u) + (T1 >> 1u); 01883 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u); 01884 } 01885 /* end of last stage process */ 01886 01887 /* output is in 11.5(q5) format for the 1024 point */ 01888 /* output is in 9.7(q7) format for the 256 point */ 01889 /* output is in 7.9(q9) format for the 64 point */ 01890 /* output is in 5.11(q11) format for the 16 point */ 01891 01892 #endif /* #ifndef ARM_MATH_CM0 */ 01893 01894 } 01895 01896 01897 /* 01898 * @brief In-place bit reversal function. 01899 * @param[in, out] *pSrc points to the in-place buffer of Q15 data type. 01900 * @param[in] fftLen length of the FFT. 01901 * @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table 01902 * @param[in] *pBitRevTab points to bit reversal table. 01903 * @return none. 01904 */ 01905 01906 void arm_bitreversal_q15( 01907 q15_t * pSrc16, 01908 uint32_t fftLen, 01909 uint16_t bitRevFactor, 01910 uint16_t * pBitRevTab) 01911 { 01912 q31_t *pSrc = (q31_t *) pSrc16; 01913 q31_t in; 01914 uint32_t fftLenBy2, fftLenBy2p1; 01915 uint32_t i, j; 01916 01917 /* Initializations */ 01918 j = 0u; 01919 fftLenBy2 = fftLen / 2u; 01920 fftLenBy2p1 = (fftLen / 2u) + 1u; 01921 01922 /* Bit Reversal Implementation */ 01923 for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u) 01924 { 01925 if(i < j) 01926 { 01927 /* pSrc[i] <-> pSrc[j]; */ 01928 /* pSrc[i+1u] <-> pSrc[j+1u] */ 01929 in = pSrc[i]; 01930 pSrc[i] = pSrc[j]; 01931 pSrc[j] = in; 01932 01933 /* pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1]; */ 01934 /* pSrc[i + fftLenBy2p1+1u] <-> pSrc[j + fftLenBy2p1+1u] */ 01935 in = pSrc[i + fftLenBy2p1]; 01936 pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1]; 01937 pSrc[j + fftLenBy2p1] = in; 01938 } 01939 01940 /* pSrc[i+1u] <-> pSrc[j+fftLenBy2]; */ 01941 /* pSrc[i+2] <-> pSrc[j+fftLenBy2+1u] */ 01942 in = pSrc[i + 1u]; 01943 pSrc[i + 1u] = pSrc[j + fftLenBy2]; 01944 pSrc[j + fftLenBy2] = in; 01945 01946 /* Reading the index for the bit reversal */ 01947 j = *pBitRevTab; 01948 01949 /* Updating the bit reversal index depending on the fft length */ 01950 pBitRevTab += bitRevFactor; 01951 } 01952 }