From 6be025168121a1fa11b9e6bd07f7cd77026fb702 Mon Sep 17 00:00:00 2001 From: miaobin Date: Tue, 21 Jan 2020 14:36:34 +0800 Subject: [PATCH] Pre-release the project --- .gitignore | 2 + README.md | 56 + example/favicon.ico | Bin 0 -> 4927 bytes example/index.html | 14 + example/main.js | 71 + example/yes.wav | Bin 0 -> 32044 bytes src/bits.h | 108 ++ src/fftsg.c | 3009 ++++++++++++++++++++++++++++++++++++ src/integral_types.h | 33 + src/main.cc | 96 ++ src/mfcc.cc | 67 + src/mfcc.h | 70 + src/mfcc_dct.cc | 83 + src/mfcc_dct.h | 42 + src/mfcc_mel_filterbank.cc | 205 +++ src/mfcc_mel_filterbank.h | 63 + src/platform.h | 57 + src/spectrogram.cc | 213 +++ src/spectrogram.h | 108 ++ src/types.h | 59 + 20 files changed, 4356 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 example/favicon.ico create mode 100644 example/index.html create mode 100644 example/main.js create mode 100644 example/yes.wav create mode 100644 src/bits.h create mode 100644 src/fftsg.c create mode 100644 src/integral_types.h create mode 100644 src/main.cc create mode 100644 src/mfcc.cc create mode 100644 src/mfcc.h create mode 100644 src/mfcc_dct.cc create mode 100644 src/mfcc_dct.h create mode 100644 src/mfcc_mel_filterbank.cc create mode 100644 src/mfcc_mel_filterbank.h create mode 100644 src/platform.h create mode 100644 src/spectrogram.cc create mode 100644 src/spectrogram.h create mode 100644 src/types.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0756ab8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +example/wasm +*.bc \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..c97a9cc --- /dev/null +++ b/README.md @@ -0,0 +1,56 @@ +# Web-MFCC + +Calculate Mel-frequency cepstral coefficients (MFCCs) in the browser from prepared audio or receive live audio input from the microphone using Javascript [Web Audio API](https://github.com/WebAudio/web-audio-api). + +Implement and accelerate Tensorflow 'AudioSpectrogram' and 'Mfcc' operators by compiling the TensorFlow [lite/kernels](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/kernels) use [emscripten](emscripten.org). + + +# Compile the code + +1. Download and install emscripten follow the [instructions](https://emscripten.org/docs/getting_started/downloads.html#installation-instructions). + +2. Compile each .c/.cc file to bitcode: + ``` + emcc -O3 test.cc -o test.bc + ``` +3. Compile all the .bc file to tf_mfcc.bc: + ``` + emcc *.bc -o tf_mfcc.bc + ``` +4. Compile tf_mfcc.bc to WASM: + ``` + mkdir wasm + + emcc -O3 -s WASM=1 -s "EXTRA_EXPORTED_RUNTIME_METHODS=['ccall', 'cwrap']" tf_mfcc.bc -o ./wasm/mfcc.js + ``` + After compile you will get the following files int the wasm folder: + ``` + wasm + ├── mfcc.js + └── mfcc.wasm + ``` + +# Run the example +1. Put the wasm files to the sample folder: + ``` + example + ├── favicon.ico + ├── index.html + ├── main.js + ├── wasm + │   ├── mfcc.js + │   └── mfcc.wasm + └── yes.wav + ``` + +2. Start an http server in the example folder. You can install [http-server](https://github.com/indexzero/http-server) via: + ``` + npm install http-server -g + + http-server + ``` +3. Open up the browser and access this URL: + + http://localhost:8080/ + +4. Click on Play button to see results from console. diff --git a/example/favicon.ico b/example/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..dbb5ad735ecbe82ab70c5621ad921ec2385c8c29 GIT binary patch literal 4927 zcmbtY2T)U6w~it$2oVe&MFfNxIw~a;=|wsS2BLuUYEW9}2muk1W4OCr;$JuUVAhJ&t|Apj6`3jm0S1^`Z|P{dCF zz)uPQShE8F6yE~?oJee=sS@=8-O~p;ngGh*M{Y|=Dixvk)iczhCozFp6|S2wthECG zY#Mr+Y8HW0n^|yAVIsVTQc_+O(8T#^GZfb}yQP;{5~s<=#U*_;A%Dv#KMWvaD)V7g zOex_HCbo-(x;J(SObp;8wkr%@MFn;)UVM2|?c!@^x)g8Y=?U^wJ=H+h=H^^^XHLS@ZaHuTmzyH4PkOE_QG3yPiTvs4SDj8B zppTdhmY66DIr}v$dwed)wratjnL=2ituo(3%=N6cx3s4fB9GN_ZN(zK9i>kufhAN; z0wFn)l&QO$?hyo2e6#(}{eIBzdXVq!{RLrKiyCeS;(TxM9mNBMWozr^pt8qkUN)YO z*ENiUG;PUAe46kmQW>J+sPP1i&P9u*^j}+z4^QMSysM^#AXH)+=7Tnuiki8V&VOOX zH~_W|ou+uDeP(fCVd1t^$%*xjGeM+3Bf9_h%&*E%h*pYvmF@EWM-QtDpSQJ1zp^!R zcLF9p&BBuV%N%pZY(JNYBJ#c#;l^w!bMcBR?qw=9kNhY$ z4Yer?Cw^JThu^(im4@i@k+iXvYfR6cppG^XuY5nXG+R`q^M`=ip`P48t3RhgeMo5p5+grAM=$E1v;Y|{i{Hq}UPqkPlNVOrZ^OEn%}Bo|SOYg6%Mnrq4ceixWy zo=eB$rs_V9-18gzxoe=dGnzW~CC<>*wW;1qJIt>8cL5U5l+m!kH_+1g$I$W#cd1b1%St^BDN;1}n&#@?||Joa{m|X?XfOu{FrAau$S@bS(*a~(KQdAWx^2eFgha9X6@}=m(cN|8-z|8BBg<4KQ)R(a?}j58^p2AC7N z;4sp6U6CAQahIA`lm_X$&`x9n#8wm`&sqg7IXcS0V-MtwOH zKEt_syV&4F*s5yeU`$5AbdFl_s>f%{Gq$1k#E-j-IY|?3*6yeIJoula9!f@EnM-wU zz~<{3{*rgiH@O~?G|i-jr&{f9a);ZrT-|{30^ZCeJvNqL0h!>FqCGtQv|uq>aQS2g zTr=0gFpU=ki*n53Q@~fIaj>NAN)3Wgd52|?838c{E0#OJ@ov9;Eh`~c*XJf??zdi` z&};)(+-4hlx@fKchxz!p;m)>zz2f8knr8L90_t4%VZGG8QHat&#m#c_XrC1^1QeO| zDXsbM_0?N^-fZ*B@gp&Kcm4QDnzs?94&`>Um?6}##3IAs5le{OI5Iom%^G5;(Njr& zw!G@-PXAu@sovNt6Fh?VdM;96Z}E0Hc#XMOdcc?*xT51cCOb-qD^y2?-)+xwYNACd z)1;6h?u_Uh`35Mouf0C5yNhd*f4pvjS7)pGIA3vDC1X!oc1!0rw$Scrrcn@MZp$=c zFE077Rx6&RLEAl11Y)KMYNPOVDg(v;XRVzKhN^B~AL|@yzAS6!zUIVxILSiqdk~b* z>?s=x=_C7qB`k%Y^nJI%Mo#ig9e*W}Uq|9h7eJ=@6{D$6YZX#6r! z-zV<1-{EHmNo+HJz}}|Ys&7Ktp0RLvn!fhKGVuPqcTv~$A2dx(Z}gT^jNZPVjY`fu zR#rlaMQ3uSSMs?I+7-8~YSafb_xdtL830Az(hRr%#ycqz$Cr3OgETszz}T_Gehewn zL11RQxO)@T;Yq%H9Wg4-e&F4uBI|7_Uj|#`LjeuVGd&d&_amXZ6%lgPD#&rwTr~nDSE3U5B>>XDmnzmr1>_U4q)X z@8I_2p5=Qcn5;Qo@cK?C2Qph}lZ&~0uHl;98A-=>$i4)Vaz6c|H2g>VZ_~pvas?{7 z-?5#XD=MT8ENt{B0XgZ_ZcI<^ zNs$bgZjqJkie-)5d9gGFdN#OYgR^$ryp(Q`UF=>k|0j>XjQdrB?Ozd=e65L-jTAlk zxSJSL_7~40?d)@69fKb@paihgS!pK&TXpal_3kKhjIA1DKXz_0&c{aomK3UPbP-0w zE)GpoX7C5zHG*Z%A|MF^gV=A0sr&O{AcW_0n2H+w+`gro8W3WT*Mh#cp$N3O+ zyIQn#N%gO2^*U0cWMyPr@)0LChLvCV!0 zKzn%~F#+`fa+P->SS4j!Gcze}-uig&lJaYdr6)r9C8K|SpsvR>a7D=eB%=rf`wkPn zHzPmFY%8lb?b)+fZ7Q`23Y+#A!^|)v#y&_f@cx!~v`=rBtNNKtR_PTBiaJV(; z#U-SS^pVM6sZ=gVJD#_)ge+Lii!xV=8^ifnmF0iLiKv^)7syp48lS%=x@3!K zZ*IuKQrvw8do1a6R5v|6#0HPK#e-%uEn6w!Zl)bV3Z8A}hB6MFK9PCGKf2e}nbO$; z$u>3ykQZN&W{+2LE7UU%=Pv^<6`$cnk_}~qqz=OTyXT+BooI_3I3y<)A>(T@w6a=6 zY46|~manEU|G1asGcr{dO!soN;YZ@cQoOt>*t=c)m5V)6RE?Cc?O~^lT)yv-?x7Ug zC-gzz8G9!HuM5st@^|R;=cU7>K^9ezTfh}lJi-vFx@$)Gsaw7&o-q9)BYfzz7Bs(5 zUG8xW8~!B4XzE$X0{G9=QP@IUqHi&hmC5DAGUwimxL6&{VvB(7vFDfFZLT&<7IV~d zR>=$4(7^(vsjLD|s6wE=Q~|-aJ8pH{ciQHFmyTVX+FST|=LLA8gFhF4sprvz*4=nL zXmB*#dMU-(ZG9BX4z2ZXpxqa{+>ICAp9G2+LKDpNJ8A`~)gDC|kf;vW&lGPSpf|#r zfQ zr#y+Ual!y~ebc~d2d>e3y$;n`2Wpky2dwyYW*UVlCKO|~_*2QJ@x+Vv@4gO1Nf%BU_0)(M7ZO8#}EyBQs9NSu)p zeJedA@@CE;-hYxV;`dN6QgHA+1aXN#?bd^y{uxwJ%`QEi@jmMoqyeE?>YOK#$lj63ile# z+@87&5)!3qaQa6s95hQB#UB(~-qXT1%CZu z;J8C%5NRupV{M8(cdUz=iaFhA3*{n-$Iv&PC@-4Fy(JVlt2!Koj~;E=_Ob-La&MU! zQIFU@T}pGcf=nyqGlIU8cSi$86o!je@9fBhH^!wB2*!G4z`9=*bIs3f&F`vzkAw*S z&t~`kvs={JrJQ}uXS6lSbEp1c686=4!j%L+=(gxGE&lV;?mOMGEx@Oa*B85 s6lJBLQc{XiQqlI;a{rCs^~}-D8TG#t_LYkGs04tXmXT(ay4}nF0)1dqL;wH) literal 0 HcmV?d00001 diff --git a/example/index.html b/example/index.html new file mode 100644 index 0000000..f751aa2 --- /dev/null +++ b/example/index.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + diff --git a/example/main.js b/example/main.js new file mode 100644 index 0000000..5d2a99d --- /dev/null +++ b/example/main.js @@ -0,0 +1,71 @@ +const audioContext = new AudioContext(); +const audioElement = document.getElementById("audio"); + +let runtime = false; + +Module.onRuntimeInitialized = function() { + runtime = true; + console.log('WASM Runtime Ready.'); +}; + +async function startAnalyse() { + if(audioContext.state != "running") { + audioContext.resume().then(function() { + console.log('audioContext resumed.') + }); + } + audioElement.play(); + + if(runtime) { + let pcm = await getAudioPCMData(audioElement); + // windowSize = sampleRate * windowSize_ms / 1000 + // windowStride = sampleRate * windowStride_ms / 1000 + let mfccs = getAudioMfccs(pcm, audioContext.sampleRate, 1323, 882); + console.log("mfccs value:", mfccs); + } else { + console.log('WASM Runtime ERROR!'); + } +} + +async function getAudioPCMData(audio) { + let request = new Request(audio.src); + let response = await fetch(request); + let audioFileData = await response.arrayBuffer(); + let audioDecodeData = await audioContext.decodeAudioData(audioFileData); + let audioPCMData = audioDecodeData.getChannelData(0); + + return audioPCMData; +} + +function getAudioMfccs(pcm, sampleRate, + windowSize, windowStride, + upperFrequencyLimit = 4000, + lowerFrequencyLimit = 20, + filterbankChannelCount = 40, + dctCoefficientCount = 13) { + let pcmPtr = Module._malloc(8 * pcm.length); + let lenPtr = Module._malloc(4); + + for(let i=0; i> 2]; + let audioMfccs = [mfccsLen]; + + for(let i=0; i> 3) + i]; + } + + Module._free(pcmPtr, lenPtr, mfccsPtr); + + return audioMfccs; +} diff --git a/example/yes.wav b/example/yes.wav new file mode 100644 index 0000000000000000000000000000000000000000..8efd2f2646fa6d08fb9ee5cfb96925df68821925 GIT binary patch literal 32044 zcmW)H1$5iU_w|fe=Gb9snliWB-Qusz%*@P~Ei*H7%8XlPW@e@|%udV}&;0d$N5^n< zY-u#}=E1%9b#2qKW$AeU^lH+x#mEW&CfEP~P(Kzn)W7KG319=mhx|L_|9u~ft6>p+ zKzS$;_rcw8OFR>=!YlDeTo%XR*0?)vfa~H}cmp1wpFe^3;tRMK$AFq(8aM=wfhAxI z$OGkI1bhizfU>Xzd<;H-V*Q2&$ih&l0YN{0gGB)0_xK^6gRNLbW_%viMnUbjc1D|| zb=Dea6}5ORQqxqYTBI7a_WIcq8jt#;Wb_7kP(?gNAJb>tOTVKVjD#29cUYR}Ma(8v z5sQg^L>>`Ml4O7|l6LYo@gJcOAtWT95KD>XL^M&BXhk$7;)!q~4UUJ^V3a<#8~S)| z<8*uw-$gsLXl<;vSxZsVl&(rOrJ)j`%#&Y9-=xO!Ly3{=vMKXlyK#ip;(Y+XrS;1E23c0=f3H~Hs-QYBIF*=MN z3~vlAjR%cYj5`g{2ANOe>++AdByIwGo*qv9ONmrdswJ@s+{8EZ)nL(CwLo4Y9TztU zJ%gtL#euLuW8Y`bdQT^B6K``*xSMgi+zwB9PrAqKEAsX7AM{5D#s-UorV=Ully@rw zwb$qdJ_%BwpGYBRQJd)?^M$R?9pJk2U-?AC1jBH{7JW=p4V2*#|C7(*-|BMPyv;|LR<<1yaAO&r?e*8eN|H~Dyx)6N>!z)60W?J z-^)s|==4#6vMBX}RSg;QXAVms-eztQvP`t&+_3v-tBvMFplHw~5JtmLSd&}3{v!gh?;( z(gd^uuL3c|C*m2wQ;Qgm3$p836W5JzZrE)w8TS~687mo&8*_|##xADDrjLddd^Ts| zXYxn6&g_58Kqi^qN_{7( zik-ym!nol3fFW2n_)qY7uy?Raph93`pjR+k=q$Drc>PzS#21oF{wUj&)5;XpioReO zWWY7V6JiNzq8~6K+m>s}9pm=#qM?Fmk!g_Wq{(M`Zu;BY(sJKY({jd~YaVRrZP{bq zV}5ULU=B60#y-Y#M%LKO(2K9h&Z2KI2leq!ByWN{U;}&s>Yx|uZq*>S2&VfN2L=WH z_4~a0T%d?7T3eh^{K9#mAf;e}^Jn4Cg6_`2u3Xo{q6$U-x_EDGU$L*ZZm;KpuLLYP zm73ZTvtD(j7BH1VSF}!*>Ku;%9L&zZ<=bFY`$daYfZ3LwES&uVGgxa zwTv^jF*P?a(=a>Cs0(9V*e?s7WaF4J;crJwM_F7trQ;%p> z@Cfi54krIlG0flWGp>k#ZOAfKFxR%cw2ZJu+3wo9+NN4_tyIWWTfXJCInfej-C#Lj z+HbsK>~9P)*mx^{ihID0V81czsdsP#agc6K7h;Ekl?QkkPLu12Z^b6!lfXOQBwueI z@X_8TuBnCX3OW{zcAay!%_H+j=ZPX;!Ki}&ipP6uySf$aa{cth1>=H*Fip%B8_3(W z4q!6)ANUTBkWa~q)EIWKah-8C*Pe?q6%AQ)5lHg%b+`9a_CD~R@V|1G_7r$6-swJLpq=lf|BA5G-^uqxs4dI#5M_lV%h$j_ z;u;K)(ex*JBUgjJ&X+PTvqYLlna5c6ShbKpAz!V$ZKtiKwa~KPW(-NT{%hW2I%h66 z19NMWYRoc3@|(ET{4u@*w~9?+I#YAucA_TRogD@$DbLj+P#^wOD@e)GZ)tka@B89& z_@DUS_y)S-3MUkHc13&Bilg&qVcg=8Kcb<3G_Srp|MOm(I{?uRTd37PBt$$Pd1L_ z*BH*5-kM?zg-i_dfmy;-q%){>RFF6d_M=XS&?D;)t)H?%k2LS(rpgATkNi;lC~OWI zge0kk?2zUO-2$3FR;1mu7T1s1LTbi2xGXCMN zGTZ3gjFlV5$Z)ZSmFL=YJO`cDqsIyHoG?@%#NE;(Iab;lxaY4ZoRogZ&EzHGDPg4$ z9;`3y(MV8FSrMG+?=6%7m&o1fFTp9)7Q1UV@F}^BG8y~j-@%viX|#fLQ(Khx!C7b; z^_4!&6cIb0*)3?Q7!fwQb!@m5hgsBh3@c z%Z;~8m2DNR!}v*zi|=SHv}GEduz@PUdPb*d0lC?q>GH}?$gk>X_lCmfuIs)6=k|<_ zS;k_&>r2l0pXuL{Gxxiy7oE)LmQs-3(^=nhwPNw=+ zgC^{_8#+s0&BeCPp)W$qIL&t$mj=;9y z6*{aKmCI_T+CZ5i-0;=)R}lJ%cXY3FE%45NI@nIyrnFRx6|1^K{iI&f=7R=QC+0G> zgP21epck;|>`i(qbCYn1ctZJGB;g%$MKub9>4+sxd(U;vi@XDTtcEIZQhlCNTZ=@=XJ zDJDN+ymgzYMz~bwdxhUoF18gnJp4jjbl7D+hudZP!tKGFG6oH&`V%F^RjwV*Qtm}! zZh# zK6GD5iKyXmkL@#=|1m8s8|`0)3q)-1+#IahPSLaVfK%n$1QTUYk#Zb&N@jm|=WP z^knAPA4J^=uWYGH@4@phLp|f~8bYY6(gg2nPnke}ISTbxCwm%Xj7VGVUW-~N)12w4 zRkQMgtI@0=;~DM!DlLH@wW;zQ@*01gOECOmlj$?2>vp?iVQBe~zQ$BbZYT^N8~(<= z&X{0qZ@y*vXicz;BkRFf-fZkhS4Yp){Y<{84B21a1VqDT^HdhX19W3+FI#QvcSoE> z@cFYIYR}7FsWe6)Z??U)Ox8^>{QZwzmzigWi(&&0GHU#|kyCHzIJ{8CE>X zmFHd%{3>p9m(A$&qiX8%;!WZ-F)r}N6X&vf9!TZc1&#sXnU)&NacYw(KB{Fx>G&&= zl^y-eR}C-BOG2+k_IA`~{!yxmEtJD}2yBA$#h$LgdE*O~ctgY|0^@JxE??ZrwZxyM zhS61+ih5jaE+zyoqvw|0_T&7^;H#|CuEU0jVFOL|gdW*nivI;4@c%F%PAjdXc=>9e zi@vJ0`9Fp!#yfDomdf0;?1~7+?+vSMsA-tUzc!bRCS&f{TN!QC2=XA?i>=6Ywlp%` z#lyu!xfy`$f9wl-n9&)+bI!tbpIfD!v*cF?k$peL9E`Y>EXUL=m6(=R{o2Y$T?%f1 zKIoKe#|-}L|5iB0^#$*9_~ZXBQ5gP_s_NbDA7!unPinnY(c63_asz=;hQT4Z#%W|D z;d9QppUZxu!X#Nz+WUv*&-hd4=kAQ>!Zve^V**{mSLEywyup8o70X1FoDgz_3=)jR z8#BCAK|+Oy<@`^zD2S!z)MfJv!vNWu*E4l*)?v>hr4RlsH}&kwf1cakbw@K9=9zD@ zF}R}A0DWLKhAfUc8I^CF&%6M&fs4Ls+2)9`G9bmXw)nGaqkDtDwLDi2^N!EB{&m8i zu7Tfr#(rI>oqwp@jTzJuHp})qa(vW7TRHjzXsFjA<;hU`6#a}4 zw4cE;o(yj(c`i_JoYdN{`Of*41V@4^e3GFt%@Wg@4z_dQH6vqV`XvyhJCxp#*rfy! z))#E^H>T4|yomeTd;WH-^VNT6+Qvqf@>G1X;oibue>}^BYlIBBzjjv*%3akhAdOte zj|hK|uqWC~|8!kdIf42+NCYPei!AJfwF-g~Nx<;5lLviUc2`xb0<&y*`OUqY%! zMBB=d4pf@!9NVpuT&`Pq6{Z^Q0<+n52Ef^%BhWK{X=ZqqoV%uAQQ=;vj3E5jSwWQqI?`fXV5XU(RzP z7$5xMj&}Nsi^cWCV9sp1Y3Xi#WIk-VXK8KUX#LFmMO|WQQP)yCXZ-ytuud=?DBiZ4t|a+z2NlpM<}(k2Za#e(59mWlN4+ zWr+{|n;w@|-aQr1hjryao@Rxn{M}9=IGLPn`er#~nPr=2+h!`w{bM*0b~0gc++O1= zv4nf0XL;b1|Bm}aQSXA}oW+^WjN+Vc#fQaqWL>>RIZ3x6DshX$K*@hgmoF(rW!t`T z3Di-l220RRwUM_&!I_-DGk>MYnS-4J{bd4sJ(E4}g!#lccGh90E;@bxYJT+k}7ICpDaX70Z1M`;(n$A9mTHOli9i*;clxcG0 z&$tQE!^00mP7m#ESzz2nt%1ozG1(dtQf(KTzuq-hJ_B=@8ng?RAqTR%%`x`+p$#2X zL(W+fOnunH#96!^Ze+i(mrz;%{o+O*kKjbHq(XRL8uZbeC1{@-dM3O_^tOcmmA1ze zGMD^?MRUE&q<;b}GgiDi`y%Z9{NFFK-sUVX;N3d|ZPYfz-*j!(Yl2~sam6LtL~z_- z^{jM28xBvP)&lgjE8LXJBG?j(MXSr>{eD+~d~ zLA8KkT6%DvyH8>1yyiJovmN=nypxo2xHTG#ju0e2&N9+IIm{D!&DOy{>U;PMsH~M0 zZhOwT#uNvgTM9borxmO%PV_IAQqWRz2b0KeHTgrng>?;Y9kx6yCv23xzp)D?V+wUp zr;9E8JM5Jt~t{^tyU0VW<7fP4*Fs*$6{6pT{M$ zkYZsZHlkRx7tPTg$v1_1!CgW%@vGnvx5#bLK(LTlKrWy|`7~1t$Ir;a(U)SX#SRU( znDTK8Uv5!E@sGlunT~Ib-=}}9lM>7exGmBqFp}EKZL$n=Si^URH;Q-`RWVu%9b@={ z)(X%3l>(Rj(caP?w>#iI;eF;eh&L1iK1lszTxa!IFYBnl08^+r(R7&`Lft|G#JAo# z#m$^I3w?zji)y*AdD{8j1bAgCT*90*d@!{zN15B2&Kc@+-KbW0yJ8m8{Y`u!o-3{< zZo<3SUs`A-4?-QuN8Ec;1M3LeFH0@+D{~*~E9*W}3$_w@3m;cI%N4~gVmbMU!fD^N z)@UZKMT}+E7!H{JGQBpW@ICpn+++3~y%09nCdwhAJqUdZi`M0ha#rvj^9N+Bk|6%U z-MIONyVmRW+s5{WbB6IuRpuwP63r8u1!e?7gciy#rMC1;E|2$;kiTtQX4-H1Y{JH; z+&bz9jzNdfchpj=BR>t6^w;#2^DhWsCLX*rVjlGRFYc+ zx_HZZ_Il!ckNmb^TXBedL_Lk4z!Ov({;_$Pb*%XSpT?}E({)U27pRFUs#B%!LQAov zSVl^dTWIUy4tf%I(6GT=-sT9|WuvUa&9d=`;U;&6z79`mDRQ>x7M6-L#jD~0`H=bv zRR+<5t}yJ#Z|UOl_eik!R=%v|gbyr^&y`R^(J> zC0C0p!9Ai~)I4T3+lAds)r4zx6ng{ii=S(iwPNj)`b&F=zUn!DQ#h1tPxoM_aGkhI z>?&p?6VFVdYf$mftz{|iDwpX7U1Anl8yH`koZ1Dd4^~R^6#QmG%hJ{7LS^MRT0%T&ck~=sFd{r&xYF0->Mm zX6pf?k2}J!^jI<%uaS2La=mXn*j>uA$a~j+Q=EgcDT&WBhM5K$CBrS#RZDftKgKz1 z6Y7GV(XE$Ykn>IS_Z03*T@+r;)-Gd~I8ATh&zVB>d^py6!nBf4q|bqG+GzEeoFsJ= zb)ZBzAj}b0OV^cr{DiE|+6+$&Z45Jc4_ls@OuZ+{znuL8>wAO#Oye(S0ypKW?HJ z?A3C$4|u0KM0u>91V+ZmtuY=nx3i2e|1qvJ*x8!)8he*GzvoZNU7kBQuYX?s;<}O%{!1mZ^Gp-1Ybe`=#@$dooA2?lewWxZKACrP8IV;#0NiO~@Chx|;sCmolD%XO7+O0k-xUDM8^KX@dZL$0B_F($e$fw%;+%0@qZZ_7_M*xYoliBtj`^+^GZqy zrG)l_%%d-GLoIL3;pU6{ad=e|)igQ7eJOpzufNkyWcA2R${U!s-_=^J#8lzyaN*oj zzOUf{y@t3-CcvZOEzc~^OkXQ+cu~Flk%f(XbI~8BvjOoJ4X-T`)>X#GY!3Y&^Ii9K z`*bhA6i<-9iU-8WqD>x&EN}?1h;GW4GlX-po};Tw1lNpT#Kq{i$|9mOxT*FMn~7iK z;o21RSjWLPqhs1{{a*#dXq~s&!W?2qI+NT5KWO`uy2=5yoi7;TFw}h9eN!%OLSMwxmzV(xJsihJZP0a&6@e9;n z+pH$YXO$aDP=D_%KT=Jw3bTt(detZ;6 z)GEqNr3!L&H3JobgYX&gimJ+<;uiBKx%KQCt{gvuce99UOI4#<(%&eGNCA^^F3Q6s zo}kUq{zA?1EnFGZL>Xuao=&tQ1>JLzzPOgwU_47K54QRsn_=oqCUJ!)S#x&0cJV7Pxtv9r~||=5C`gk z_uw;F4p)N>C_zh6Dr%}4P>b}Zc#I10bkGnq!3k)zmZL4vnrSQ4Ukam6(5&DxE<&qN zeUy%}&}N*5C{zVs!PmhP{faychd04qfI&BK4Nuc8x*ckuMd>pOCc_m(H*y-apPocNr2Dh?*tN_``Znb!kCM&E>O>9L z9fa!)^jy#z*F}xdZS96?Ruk2lS`w1+zi=$kllY}`h?78XojGm^KH|~1Dmthta%*X+ z;0zi>MXVva6pseLUD%8KM7^bNGyS*>oki-zmZGbZ3&Be*M@?00>kPH5zSiEOG@J#V z!;4Ua(d2)mTaULVbmsXPv71;24}d!O5XwP!a2?PA>;@&^8rTy;q7QKeo`csQ0{uZ9 z&=;3QrL`?O8&ysnp}tmcXuohYv6XB^d8kM|OS{GxnepUTFcWvtt>Y_tt>dtJbT;sh zoGY!BItXz>1<|5o-Dl*j$_6z-XVXuieemE8k(03W33Nj&YRC=q; zvK41aA#*M&Y@E^QMM$t|5`2lnhS%Q2^I3Wh@aXXZ*jnGbOQ#1*2M&<~}j zd`)~8{71K_9l>70d2y@0A0he|cLyedC6|)l$X4`F`WLl}dPJOnqhJg;fa~k1Ol{B{ zChFI@1oOdZuo1q;dvGaTTd@<&h5O)9KoNUk3jCL>Oy-ck$x37~QJ;u_k)R~#gvX#2 zXtUN;TdF^E5H-NHz*M-ISWOtHSo#Thin>DA_&tG5WxM^w(4lK5z*Qr zFbWuvAAW^8--ws$p00#;0QXSetD06vovkic$Em|q2l}JO{e@r#j)4*6Z#acGNo*jh z!5hRv@;0nO`~jh01)8SceYg5Uk4IJ22GWN9z$@bCMJAHn{<~h|dd1Yf4m>zo7fn2(^xC zq;G;n>S*c;RbHtIzER2QA9Vw1#*L*Da0?hCKf@nj8;#KFk{$7N?H%y~{D-TV0f$i^*#+jo z{!%)yk)*mD=df4MQsoH$O6w?XWM-kCfij{rfLIRftaRsgb&~Xuo>r)cR;ol zN__zxkp$O5qt*bY!C|N;4ktS4S>{6GGUln5U>UqeH9)@!1GyCYzzHf&-`ipAHLX1= z&$QAbrAFRCiaMCw1NSQ=Ift#~cp8G949$QTiUOdnFGS#qOJ{W%Evl4r66|CLe;T($G2 zCstFWw93(`Y^k(qmVC#H^tUi3c9+u0JGs}inUPb9P8Q9M z*{Y-2>%xChO>#@}`Sy~9GnBcp({xqK^XR2uTK;cG?GQ&sQ$t4N;H)^US!{oIitB7_ zD}~Je6gr!U&40kvwgcw~%@9!{kmIGonvhlV)2t?QR@M-@spChXAf|*561bw7j`j5B z+|$MjA?*utwfm8ZZ$fdsn780S?kh)A^Y9#9d=!~jFvou-W=R0%*9*T)jmXF}BipW= zo@jQ&T5lis+sM;ea{g^od8485s1gbw-$k9wQ6$kNsJDQ7$w8XPcYBt) z#e3U0hAVP4gHJm!d1r!{JChN@f<&|__Nm5{!!}eCLD{%5rs}ej_JO~ zs!g(=Qx^KaTNlz-eG4hxG$dGBJZMVMIs{HI4Om0)file0Ub*7yX{=Az@MVw_%~OOh zQ8rJ+cReearFmt57?&8X2UiVMMMTWjCb;9vGSn!!DfAJ6;T@o0zjy|mPC zG!A0z?rUIK$Zu~Qf9sGkc(t<|2h1}IAF0_P{{?3FUl|9`!#%gaE6ZLXNho1VhYLI{ ziH|0ycbe4DdR5uvZOeb7pLSpLxSht)g_F z9RO!b->I_jZ)GoB1eT#QI1Xmv9cZJj6g-BqfkI^9H`;%CCTLJnAwyMHR%jA^4%bmD zgNF17lqBUcBdO;?5>Deqbx<&1$RM)a%Sj*W5sYF6FT$p7z}4X%`kG(^%V=YyP$~zO zlBS~!{0;t4DyogC=1Lo75IGzsOVhw{;+wn_mm!COTH0_x6I)R$Wi@bq94KKT$1e1NkA6&nr=& zDyX3qX;08luv(ptZWEWZHd-Zcl(?rfQG#>??kue&`q3}ssY)0AIc_QZL)K$e<%BTY z#1Ut`y_scvd-0Lf(tHg6&tH?C!^p~5DS@v=XZinvtBjfIS)s1cMxPg)5b=Z1WAQw_ zoQ;z%sAmneuuqsz&F5)>$4yM1q=7g|kM3shQ@czE;2XZGHh^WRq~IM=VY&vd6BXfD zz4ok$Ce!Pbk$4KcNpF<*$pvOo9w^Y}Z!pC@mi6;j+((I8#%kg=sgkjg<`NmUly(}G zfM3u=@(?PgZl=3}E$VbofoY{zH)HwzxQcj|-cQ9T_fda3f*2wdlU}BqtY};5{qU|F z3;Xk9#Z%g7zL89#pQt1)s;Oc#!*S&j+J^Si@kH0aQobtjKpcyJa;P8k11yqXs1pp+l&e7--IV?6e;{K$*R7ulx z;k>Zb$Z+?))imfRA(r>uv7V(l_YmB~G2j2u-^m7W17B%!h}jTigGbBmDQKJa2`nh~UcMG_lDa3WLx|6|Iq$48BkG;BqhOasSEo5I zNQ)v{DN6obTQ2v*DXMeAH;7wYr|hk$pYCqBzl~C4&vff@Hrdq-UA4>d24{IYYxL)g zCnrTZy(s@-tc{tLTFbU4d}r<_Z`p(s%D_y?(bo}^nI>E;73s(6qvFy`zkf|(R+s#e zFBc9kl>}{n)`cvJ?3evUSdd_pnB1`N)wa($PgQGdsy`$@KC-K!ab{Dxe9Uj>Ja7 z5`-9jCma{tk1m`36Q4?z4PQXlKr+lV@Io{BiQy0t={4vz>>Kg9yoz7Vy(m1zH#Nri z!iDMfg`jlK%*efrKW72;G^Cbyr-zMCQ|4uDv34`v%o&X8#iTn|yRJlB!&=TMjtd); zdsrA$qKaoh{*Q=@)R=5D(>0WI_0VBO|9MwM*T>gVv%~+kD4nk&v~+L+pykBm>vIvZ$N>Y)b}M8x>mBUwGT zu!up0oV+qRO^7ZWU|(c%<|I;QBfAv7_f?9!D!9)?z=?`AzWPqilIr)idmYZ<_{G0V}6=n2DeI^OttV$??-+NThbjzVnc1ueRZVym-t<{ zU|o)n&WGQ_v%>645(# zkNsmjS--}bY8ODX4YVym_Jz^#Idi!wVxpRn#Gf6J5CfYaoy9bv?jLmVp<< z1#_6I3#?<=SlG66K+Gu1ei-QrB3PFbH7w^zQ0IOVCur)|aDr<}q3L2IAl26DO3 z1M0d0(sq__Rq%#*>S*Yt13er+@wVc=+!IrgtAp0Xu~e++9_`pjlqzUxePzhZ|4tTJ z8x>De+uP`X6fjtJM=7uvyT z_x6n*Bj3$!>xeLp%1L6Tgia}}CVdM(FBZCC=zZvLcjgY6Cj0J5i^Gk9C%$?1qhO2s zC9~0d(*04hIO-|$U7ajFjmCTm)Qa5g-B##|EW@q&!%>P7ldB2KhCEmn`Qu;6f zPw+|Rxu_ea3cm9s>1IG55wP-E+ocB!8;;8$4NT!vRqg#8!3J-*b8J+3P z$)uP+p5lTQp|O^qnH%x!n7@R3xvwp~L;Gi*6JN&n^Z&@YX5VI6n>|+<9oUVOv99wJiKk7Kxhcg<(Du;D z%C(}qra{(?g$<-WF(uXW&ZU;?rhsRVI5p%tjP@qMCdT$?lDi!@(>%(T8~oe$fSlk= zral^j(x5;ydOVvR=&S5u7SIE{>!d5TP%X-p#y2uH@KhF$+A?u^@o~B@|5A$fmFJop zLfoU(fQ{5Vh5vB>S?0N}1e0uEKqYsOIB61;p8jsE)zl&wA8>dgy!MU6>y1G*(!G!2EPwn-fuXios#L*WWZp&f@yYm#>7&k{{brhwi?vD0HaLxJqbc$VGQfrf zN~1bvpAsDmr*|9to(9?!YghGauqj2F9t4s^w|Odf;qMC{@%QE5QV}zmIHh&h<`Z_Z zy>cB}!8kBNd#|;or)XDz3sl0@zyc5g>nPQzS#W`L9G;_Apf1WB~w?$Wh`>7c*4KwA-f&8D&yg?Vzk=_$rZuLFf8oDM`7dtYn7b zzRFjW$NZ94i=Egi_<;N#wIq+?Ey_4>o3Oz5;tVi_rRBNWcDgR~DZjLC)JgnPNB0-# zJvOQGYtWyFh9t6~kyI4AtZo3msc9OaROg0hsp<==J(SQf^o6*G8o}+z1&-2*(p9*Y z8LVB^5c!4}F1{u9(Y@rmumx3B?>(4GE+mn1fILZlP~t!uJWg~{qi_a`(F$b_J(3)+ zb;VnWn!0AcEdGw>QOChTNrTtf<;o{%z?wW31U6D4<8LdFsM8+0IhWJ6nw;-SGxKG>^HVBxJfR{SD>-C7f3KKk;Vm| z(rwvDskdz9))S;~6a0^>taX&1F-^&(@*^US>Wp1#G+hgBQq~Y_h!fbOwjq76uCfxI zApZe%)Kl;e)j-jaZl)xeA`FBgn1NvRFiQB80Q`6PLm`7V}KhH@9-x1fZ>`0sMr;CW^a*+J@ochbX&&+q(sB2r? zaJJSI{)9W!j;Mvs89x#&)F^V0*dOR4LM}C^vnl(ORe;3p;UmJYtwsbrRr?}uq5dIH zNWIk{DRMJ|G~8zaw>JoMJe!oP}5WWWww5K{V^9lV1 z$#@YcNmkNg&`_d1c~H?ALOKQKtG_X3?!Xx7FIbP;gO&-Msc1uzJV30*O=sW9x8#fb zSIQ#ftDo83WSmfk7{w=PeYFbY6yhwb3>v96fS3Ua7RK`Vv@9T%Vt>KgQYEc2)l%2O z=O}%s>O^-nSs~~v6HRddQM`mdqeeEB7Q9g$Q0(Jxl|G*+#M{*nquB&D8Oz zh6a<;O&!ZDl`PQC1;qUlYa)oc-o7;EH^{Uk5O9qU>OR>;ZH3&p4b>A3kY|BJx(1pS-@S>L7502Kn-9+ zgYjJKhLYx2i>XSWt$Y@2rh02{Do+K2+trF>Z*6ilmr%Zp4bOlpaVL4>_7x;LiJMH;`US@B15?gDo{0W2ebp#CsVPi zwgsPwDY!pcM>Ntql`*V<-{54u!+N>SLZ8qZx|^e9P>zZL+qB(aEo=<_1`IxlRCoZ-HwGWw$o+t}}7jMyM+?aTcdZ>bqhjvy|Ks@&7>`y;DN;$7{d`;vVz)IXvdZJ?D z81krpbk?VbDuaKh`buea8nXw7Nae`;Of6}$a+jNfJ;6B)O}!WW-~@eH*D0_pgJYB- zbX8)s(hrxX_vy;3rFy?%YxRGi6f+8cQ~Oih$e+q8e3v%~ zxgY$kwby5Jv6hJ&P|;|=_L3L@M`_z(9debH0XM@$wKx1nS0G1dm$(^Vhq#`6%fu_P zx{CRq9%tVYgQ+oUbz&{ES9z%});kA#1U=*xeu*$n77TNML-<9eF-?>RG@jmwhiZF> zx$w1m9<-ruXwB3sWLeUr9zdydHQle4Ci)XIkpVS@Wxx$>GMI=;=-g!<>V?BVM_3Nc z#VKedP5~=5quxun8{bqJ%u-KLOS!eKt@)trkt$J;7$v6SWU7YxL^(j*#CMeD;4my! z9;!(sjn2u}iH^i=`J48eVYIz+2z4FDDs#b0Vvo8HFCVpCqVw`30!K%+~fIf@w{BR9BO0snO^den+eYcH9Y8Czipt*gQ=`!gy<4+B`ig&(U15ms)mu}S@hW*5 zkw~ppR%`p|6qqji!D+HGTCDB_n_#|j7P-liXp(w^*r-RUvf5SZI=U!t16#>~xQRMT z@5J7rC8<-OjZ~HOYD+2sjFqe)nJ$g4DNl)UdKSQ|^>m~>TIzrgkqy*Jas_G&c}%*j zszijqz3N}+Dx^WUa$jvgE8_o%-N>t) z2TRFJ?Tk8(JVYK+dm;xl7(7gF`9}K$+AQussnu#3%ck`b6Ikq zwijoUrEvx_P+iD#x*zUFO@QJ0Z?2R7*V3K9-Bf-513$+svy>r3M3kuvB{Yzs%$XC? zq*M~$3}q-vWlWSNp+RVnxeTGANU2DMB$-k|#>#Wf|Gn?;|GKaH+#Q2NsJ(I0j6>{`9j>wh{%dt8dDQ7I3BU`%m+;mvnqoV&^_J`B*=Ssy} z;uo?mNovREM>iz5Mwi8nqIK!*QKe{W`eWMH-2*O)@3I1;Z~SgrE9sk_O;06_)2>nb zbY3z)`aW8c)iJFU=eDY#e>6B+b?$I7Fg`-jpO0RPKFg|_>`D)%mt;-JY8*vrHmj_< zN5``+Klfp}B;9iE@uW=LDsle>_lTI9m1H%faxJocO2R7%UG4XrV}DSbJ4FL^HQ zm^Mz9B^>@}z z(ebo^@#HH$-v{Rb(e89xG&}AamG#@}QP#Cy9VONyJs%y9s%2{!uSti-Jt#(}=(l)L zTsoQ`mCV*6zRRk`_v1;?=h3tAKT%ftSae&#Jqk(E;}=*_?hQf0~+JmKJoLIIV8A z%V%lTbYQwOU7rqecavr5g!FJy))6Dz3*@|XZ~D2lXLH#0O?N|?D=S?h{^yFyS;6*3 zdKnbnbj~B``_|R`mj3CE4P#R4%cEh|hrAq}AH9?=O@B^D8Z}+zZYPhWmuTsh^trT+ zYYs~5q(#%->8t7U_L|^c51*%}wIN?RKkc3_NQP+db76gKdM2yUk#>t@Use?T=dLRoweXhMj9OG5`qSXn*$SgiqAGD` z4BigcoN#_DddoSk<*kL?2_=p?M4R;QU|K3J7|%>krZdb4#`^ZMvrgt;4WjYz{4{D3 zol0M1>o~f|{a>13^!ZU|??2Fw>{ee6;F|+jYF64l>K*Nc-FFx~KCK-!j!LEF(`wQ3 zbYU{cYiqr06BV(ha9DaN&v`%X8BLGc@}`HQ+oJr|K;0IdNf*(Y8de?_j7mn&M3+X- zrFo-EqPA%ZXP%v|OU9{g^ho7QFnQcnWlX+%GtGkT!1V3(1go}Tz0G=Yb^4?|s`+i= zVnxs3bOR}rTxRxm0J2yx|KI< zPIvIqqiMeAcYT`-t3&$na(Z!eT5m_=$3(q7W_|UmexGt&a%0!)tl#IBVeCH5hTbb5@J0IbR%IADt&^ z)o`Y+^k{GNlh^82=+%w(r>lKmf=c9Y-m9W_({0|}nil72Q_^PX)6P*SS`6{*(TC|> ze6xSrj@>U7vGcmFRZh|V=&E?P{=XRQj*7-P;(5^^IBtjG5q@%l9cHEXr%yohkMww2 zQB?Vd((J>-FI;Uc|JXo1UUcuIY+5;w{XR-BPwT;8S^7y@DrzHpFW~A^)7j|*=^a{; zC;i$e;&NJiy{%JO{yzFqn|3_`&+*oKb`z}{YTs>9W!CBx^^aOc6?sW%YH+Q0+vwAI zMldCeA3H%ew{3Sqv>dF8M}J_zK5HxIS#vm6>$6~z$N6ry&%O9Mm73iVJraG1-96c2 zmfuiZ4f*p}uBtOMif*L%5Aye^eDXa}qaYQ$!nQWlr*hN+{_U`-jO!QXH#bJN@S}oO zz+OyU+Ol3nI?~7We#f_pw0sMdEsX~atoy9$^#a#3jmp=nV)P#N$Psnm->;=X6Gy{v zkoyo#r{lNVa+Lk1x}Vbm{yz)T*5G{Iv*OVjgX2=vhqBq{xfI&9*(&g}5M6BS{rPas zCfC`Xo=TI93f+M54>`+#Xs$C4j6SBaW!Y<#JzMe?Ye=KzbmdAucDtT9R%&H5OmbhI&`*Qkr7n_}=XsvPX zKN@p}?;YlCYq7a1gxZK3=i|^`Ubvr~PO@+==$4QVR%fGg;%pvVZQ%M%wX*=nOlFNE~zjoLc1zyGEmcs)Y5SK#SZc#fAp zekwM7;am&6|B+AZ)%)%uOFs;(0+BiC3nIajBFAc4QB7Fu_&e*WS3V{A~)F+FVW#Aohxjav9|30{OOtFohR z#g7~K!+9RLX#I86ql(WRou{VHf6A~9@rk@JtO~bM_DX5(o$i^|Qf4uXDlA}+?XI_& zjmFsWER}oH)=~W6dkinn&#t5I3u(ZC^cOZeqs1p!t&p=Fg6wYgTWx#b)!)$kk_8?W zU*B@R*G0IA={J~BIQm(v*pmLvPV?C8O?FArlKii|-z7ZGOMB^OP~DQOdu7HJE#WdS z8YBO@6E-#INjLBAv2~0X)62Go*w)$OZuqpcM{U^F)UMnR`Ny@k^UhhgwoKGd^k)@s z8&2Qe|I3cIKg!tvQ`OUp{b{N1ftNfiHjP z&$5ga-{eax^lBx2{~abjWuA-l{V9rB21hR8O$YFOx@*6|2A^ro`;Pe4InJe*>-8R2 z-i1BGw0H)7uEL{_ynBTvcaSl3(()-V-0hgdJoiFqRTTg0c)dcuN6MXMdfbakFS(Q8 zW7<7|=C$*DmfyC5^cB2$vv^iWgegU(hdOGQxILB6)}+Z7W;~#To))F)wP^StURO=) z&eMa7@F+i4?S#;05Sol5gGHGCi8f>4wIZWKi;X%5`fSc7(~Ix%rXbtYas5Hg*pw%g zb*57ED8z_D*z^;7>|lfa{;tHFO|<+EtcmH$CMb?_w#|IvJXU_*dCGYGAJ$A48D@&P zPx8f4vdj;n1^!Ot=k+nS1Wl`!VN!|cUmEj~Hm-*EY1Y}o25+#&61~|Ck+WE8r0ww! zYt-N`m3dPQ%r8zaj%(SctToTx2jyx#wEYSgZkMlYr-E1FVMQD-;gQ3&s_11IBdH~< zxtLev(fiBj=M&<2d;H8vOZHNs_w?%>s&NoI^Sjm_*I3TFi8j>b%YF3hS-Np+Mva={ z|D*c2h`q+M>-lUHG${uyIpiG2_|t_HDz+|vu4})BeNTvC56FpbPP?$m0E#rg72lUl z?Zng{^tYJSUrjOk$c%1anUZ4q*VOZve60ZV>OrB37N26rrOx}R`&({grz5ntpkC&O z&oK&@B|q!TE;aSDta$%B5C2(Tj^cMLO8rF*o}>H^%F$nfwu%;|Kg24pQk~^|Zk0A~ zfb%MSi88k9~mFpgdlE%y7PNgk?WMlSuSN_!X{(Zfx$zAv!mH7xH- zGp|*rDj~D{*C<%^Fu71>e4)Ilqdc=OY~P^;d!e$*vBgv-a)`fSBvc+R4q?F(x;QLj zm(?O&(4*5>wN8Zk(lK)=;)gK#6OIMsOgo(IJ=Qr6<2*bhp{x;a*v``X;8|1uJM+qO z)VPA{l*5mOI6EF-~3y%LA^oPEWtL&pA2X zzpi;KEkYSncG#>p-|>(w84G3cz?w#K{dmA#upL65he33^_jj{EFFNw5@yRBBInS7G zE!-cY>$NcJj!fpa-$-yao4n7WeyJ3#VS$gZ`EmSf!D0{ck$b$>rc?_t?E`8!8*&ri zJJ;v;vFKj$tD%hlbn=gTvVM?kW0zXS37w2suP}D4C*vy0MrDi+E;qN_m33w~%Raq3 zp?8Nwuao+EL~gRf=lQ;!?)f1V|H3tbN~}wBYeO}77WKH84_v1ogFQmjZ-z~ivGNi9 z{#b6h5vL}JO?{%VIM$P9UdIjxTw$k7-~bdOUcZf{7sGJ}^?XSrdYgj2&hGPA=3`ta zNmH(H?rSqy)b)^^j-B(2mVeB!b!!x>OB9N;$KSzqusC0h%}e6IQC2HVpLg=>-(2-P z5vPfar@MAkgk}wlKFS+|f~~-zbH;F|_-s9UJV596L3bdeM#}s9!z`z2YreE{+SvGY zI^3V8m0Mu4(~lb0MlivLby?69zDgONc&jtMZE$0};`=`k+ zZsRX2wR4`&Pk9VADhXrF6w|}_vxAJMr|kV+nQRydT|iq$(!jn{=$KZlV72cYA5J*ll+o$r=xJvhDE{?w_K>rN zk;JVy+8S2HoU4L%jKbwMtiPKItn^q0!9D!*CRt0HOf~Oq=l&A^U)I8bB2qIRRoxjn z$QGJvOW{nea7JeI8$J16Oe-z!m9x(_OqgM0-A#3C052VAEZdVh55tHxFk6L}`5Yho z@iSX{r>#`wN~LL%*9c-N#Aaa71lZ1T#Ot<9hUlmK<3%IvKE@dj+ZHT0$={_o{XM$} zO*`VuKV$lrjvOkx_*q61?t7fa)vEFBd$4*8gzn?zbByB;iZ`=m6vO!DeOT9%1~;KS zJ@ofI_0&F4?yASb;T7r#mFQ*f_Vc6jV!&m36%sTL2#(feREN@mnuX4DO%o?xBe1*xq0 zDp8}U=LL|iBcHxni^_U!s6{Qb>~`0@)#n1@_j}IK&#}9)`;$yOUO|QTi(Y#u-}|hz zo^|d~rM%MUxD{Nd;P43de7;uoqdzOW#0D=KV-K?RQE|4m{qEp30r4+IwsvAf6N)v^ zp7VIdY8JRiA8(F6i4M3Q@-eTW>NO5K=X?K^s5FJHjLUHLH5xmLstn~1L36`B8ivCC zn~b_nqm~okIE9Vg!tzCwcb#uT4Q+tWOV!!diJar@Gt|BzD;&UfO`y}jGerArvWWWd z&MwMqqf~p~cmgMmdcRpOHsJ2>8Sh`q^M1spN1@)?-&ydQfn^WFXCn1n%sSf~o!hwc zmyGAn#N4+WwI9A?c>eWtx-EQK<8uqIkJH|eS3Sc9L%c4Qudjf57`JVL{{|er1J(^_ zxg`i@%Bp3?&?6nO0J>Z8;#ZOJv=+YWY!mVNunI_PIYUV!j>9-p2^Sj4iRbatP#J#) zk6#e~mttVRWU$X2@S+CR^~L=0;?Yc~kM(Q^s}}g#g^u0?|F*Qhmve{8P+)2Yd(<&D z2&}4UpTMdRgF`)}K7H=bVvTKoK&~>477gb!57OJJ_9?_mj>-dy)9s?(uhajZWx`*; zJdCS$;c_wY^udO&*F;);WU)pTgw#Fm1GZNDjcd zIedPSclU{l?LBH@?p3M}UFGg~Q=dzy{dw5kLXOkhdXy#}Ma{;hNnufTq*f&|l7D67 zAIazYsff&F(V&FOG2ji>3gg|7Asl4qg5ux1dio(QE}@Q}z$SUYFbQjxF=e z3H=wHRdK%{{zGN{2U*%;->>!kGEwgpb(%M+&=_|6E&4agihl9U$Bv(sv0yt|J%Wl4 zpt%+0pdkZt=TyGm9mXU0OdmPLAeN~@L-$aS)i8OFuTJExE8(`Al4r@fPiuX4TJb9+ z7hv-w@o0iPbpqxt5TE{sk?Wn|lxVh9pTDNZlPS(vTwTUO=cw;s8DImsM`3q0IFS67 zWP@B!KGIoE(vC8`K((f@$DP<;QPx;Vjp?T<0D(d7Q`G=9uQ*jnbC4 zZNFU=sk?Kmz>16TI!h(3C1&0sBWWk==%=@{;WQjOZ-T~^`aT49^KpEOYVdDS>9}Eh zb=*4c9p4u}65km&jw{4vAfNayc{3cIw~w~Mf~nhbj+wRwbUH1}*`{^4YjTTQdI z5%Fl>J)L=-;hM`S$ZPbdc=|_@!x<_=x}CaRlMIs2u~I+hd5G=Cuwl@cpICpVcD)Ym z;Udcv-m}G)?R4xQymMpJ-FWmWJQiixxdL9};4&E-51AQi89x=j;jRU*#80~C#4Yh% zao6}Nch5g&R%afaR-m>a_o=8y&E8Yv(*73;+bYf zKZ)1IAGpiJwD_TTP&_+c7SD;_aZiA$@!+^S#BX7Xc2*jdjtk&=g}A)jc|W_%pyl_8 zz~$s3VdPj}ju2+APO^7#dViNZr7(64X?UkT@$hDB2la-zkb0i+&KCgtU|xUwQ!f%NZWd$J3Ye@VVdw&L>-MvMQ_ zhNAGV=24f%6lH^UqGNuxsR!@t=}}~BsWh)S&(idHL`L^RZTy%>^0^rB340!up@xcB z32g~uw;JM4L+x$owGBLPk!dy%?HlskL>9Hzs*Ig@QvfPmM79q_yj9q^flkb&F7Hu- zW#Zd(S{x$hEOwa89>ZmI+1R3e+|(>}={S#BRKJdl!#wm+_a~?rSM|6sK1gX+QswVe z!QQ0V_sCDeeAi%>ZV&Z>Mx+~I6|$`!STr23M~R6Wj0h^LSd`SxEEc&aGp;P<{ZF!v zote78F5J%s@zlP*dTzk+n(1!$(EA;#+h{;e|Ba#w3pC>Elku^cS^RotPU}&Hmbl&< zqi>UO^!NR<;^BulIt{W<;N>unf$VoFn_cJqGg>ywx0B@;uZzSJX#6ltZ^$}Nh|06Y z#pyimId&f_2HcP7GuY)9xyr@Rzf8TMt7pi?iW_xYC1b0{bN|KtoyOv!57|YnB5ZTN zIQgY$_KmsfFChIFjX8n)N25GqW>ZXm*4>1b#J|M{<6ZH38uKC_XzRPnS);JEGW)Fd zI1-)qtASnen6={PCKmlvb)$z-&qo+Kk?uVJk&p{?%#2qaf)Ud!e*Ao>UQeJJhgslPinB_@+K72?P?jg@Z>U&rpge2Xa5{~dW#9R*33JaUSuSYo z499%#{W^PZ=Kt$(_fI3s+Wh}cK01Kb_2ZR8#PrvV{03u6BbbE>$r;f*{C|WA7&wk* zj+AqB!|FWh3}O7ZRBIp7_R2D!Dm1Bwd9$l2&@~?IY0xZj;u9_0#9|-w)sQRxYfd}V z>z_2DT;zK7M6bHkw~9xYiMp3HRwYh&MFSo|-i3G?ZsM>NyC{$=GIsZ2D=>HnjRW7JUg0k7jfzR7Q`;6gR*+ z%%Fw&u~o_l7Rgwt3>DdZ>*)~EpV;_?Q5)!+4LaH^*<}aeo|i6Ni2MT zB0VqCj=}s#WEovx`>LKl%Qlag1GzzrE3NW+*neSq7{8`q_C^-lOIugj^SO+wY)i{# z#$2^khRSLAK)ij%Sm1w{JWzZaBby8BN?yRi`?RN-{@%($U0J)N{)D;ff$+FY)G1-4 zR?3)aBfnaP9baS8cGvuxeS`NcHl|qa`X|^tRNxPI{%eeO0^fs8x7y=f{CZLje1P-wDxf9)UZ<*ncdfLvBBUzs}Y6EtGGEjAx^<(`QmCxZ{t0suslSKvRao*E%6vl$Ri#GOjbc5RH8OH z*BXkt*mXYUtrbPhP#f<_SL=#2p^_i6gTfFFc|l=kDlBgaD3yf!2Jjfp3`Wdr0_vZd&kIRgo^*DOxrtYQv+Nd#dEu}NJ| z_Fb(_JM?;^?0$$W=}DQwdNFpBKE8vA@AH<$87=tAyWQAQ8g@6adU@Ej!tW2Te7+3j zSoC+aRg4`g8jKeGM~nR3S>zhb=#D{m@q^ZS(ozQAoZ1wJd3LPd&z9R<>m6791WJ4P z>$k2xQVw>H5nxXk&(_z|Mt%pi^=rNR7LGgV$Q~^Vt6M^C>j-vy=-BM=sfVGDLiAb4 z&Jve~dyItV{roa;rAVgZHeJODuB!FTGo(4Oya51CML8Fy!D(U^`2$`LnEl zv(f*jY%rW359S9?xSZdF_*q*&ic!Rr#uxETDnI*24^#O~4g3Cw z)7fyk6>LJBxKakUnKv#H2hPGJ)JYGr$!|Qlj)?xO$NR<~6a1a;b*0hBTNvGke}>f* zZJnVWe=VVJ^)vbrdQri(PeX1FPv0$ztVYp7{qis$+QlEk%9$#j7s4XU)r1v1A$Fu0 z7A(Z$P+7B9)+pgxId64;>l+(Ph2>jf>~x;j8_H!=C(fx&T*wMtSfwFfxR4!gz_BnY z4prj~(EdmLt~N}sp>G|uCam`Mzc*mj!|1UeJYSVb?viN@9 zf7qU3J;`%o)H6J`0u1-?{SDf_ll}ho2>JIv`c{N4oyp{uD>KS6L{|TZ?Bpd%Gf%wv zzic4n3G4LcO~+TDNY{yXAulaPw~pieK0XxIP*vw=-7-9`>AW!yZJn`Q|BM=TWr_Kj zI^1HpKv?-TROa@9$5fd@7d`Q7OkNXKB2>><<*akBz{j7R{}b&Cx$IXI;aw`x4PUQO z+sSJT(mFFDt73F}iAqBO`BXM&A5H#DvXXHF}kj# zR(v8klN?J5vCA_&`ZqQR_TJ5}^HPawRPZsQ&&O!x6iT^NFTRLAGpagnO=!V5HiFIX z?*-}#|Ehj1!oYNhc{P?@dhngHMxV>+-uE>03>5dW$_YBN z&zAk_Hlc>m3J-1*=Ndz(mu#w^Phpi%L*I1p-DA!-Tjm!k!=b|8A0MulW!>y=TjRxM z+TL8GxkMBWIbMjwG1UuI!aREUe>3p=smK;eHj661=Wm#$UdiuYf=3Vd7K2$K_?0m- zs_nHPB=cKOcmShMC%J6DhE9&bm$xbRSz0v3c&VvsL=C!gt+7-iPiiT$=-nN6TAZH)48KruC5i^f12bL_J&ZY6+YeUOd~RRXc!N4bk_uFJn@^UiQih-s6&PcxSO{fx;^{bCGsmC|E%4cpoTfs)f}bu>LD;JTP#&Xy7F)M^`gN=7Koym- zBe4Dm>qhh8TlBvIM4RyUJo-|I_3npmH~kHmH>S3wGa6b_mRJ>bom}-E`}AU$F5+QF z7Pv?(yOITNWY1=fyxkdsZrmb*hqK%a(ZMi%l*R>=AA;*WtoxCA(`S@^f!Tu9+8s3T zc4usBbQW~GER1W3E+O+fAvR|j&4ii3@uS-k(8o+a!P`YlZt+k1HRtD0nJ@}c_vw`y=-Cf_6*lJ8mSWKzPJhQPe3 z`078^$#%!l&JBE<@pwm3E$VOlFciBVqkfYiKS}$S>P47Q3NdpA#CKXHalr3~&Qh9v zY6joo_!3pLHJP;&yX>*r-0Ch&KOUXSWIyZ8jU0~>wVr=emp7~Y{OBwPS?*HnEPmx3 z-`ToDMduYt65`5rl)RbuVV(C}Rl0Ybe=W;@t9CcnrxhaUTy^`q*y0hkYAu63DF0a_ zCwf)R^fCRKBX&)bm40OGFjh_M8h+kT&AkdWz5r*7;czZ>%QG}2kDC3-t zvdP~+{TJeF_HJoJSyYrfr22N)mb@y$@Ay^hTgk#?S+Xiwfbo^+^-ZjFCA|riw`MU8&VWKTJbMHY!5J7gO9j1B)|h2tXOKkD6=idnO;`3L;Y zqxx0ZFT4U)+2V7mGM_{56uj26O;|IqN!&eP?qd(M_xtY`C%oq66MsiJ)QPh@>ioED zTqO2eDZE_CLXkRiN0s|taU+o;SNsXL=)Zk{p{9WVkrFi@?wI0OJNBCy4t+Ob~1Sm|DgKWgP zi=FG7J;R!n`)EQx+A!GnZRF&o*kpUMIC&%)fcbAH)01gl=b0-RnsmeWc1cgXukDVo zH+nosZ$8EKedbSoPj-s02faI%R)(&F(242n5bd$Yx-eEyz#U#|LlrON#M{tNIxGa(OT<~%B?P_~P&Q!_gb#gK94 zx8GIqwHbu3HXpKq=6`L5a0AvpPY3S7+opJ4i~?MO>E%Vduzs>U)h^*(Qz}teo}Cz1 zgq3YsqSq=Jz--yiF7wz;Xi_mfI3q?S^4_p=dmfE`MXU>JL8s!?hhpB_wuU+Wt#XI& zurBtf9^#kD-Wv|M~7D0O@vs>i@A^N-u zhjqC4BhO8|i`dy$hR6WVdNFqjUQI0Kr*m+W-In literal 0 HcmV?d00001 diff --git a/src/bits.h b/src/bits.h new file mode 100644 index 0000000..9c4b824 --- /dev/null +++ b/src/bits.h @@ -0,0 +1,108 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_LIB_CORE_BITS_H_ +#define TENSORFLOW_CORE_LIB_CORE_BITS_H_ + +#include "types.h" + +namespace tensorflow { + +// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. +int Log2Floor(uint32 n); +int Log2Floor64(uint64 n); + +// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0. +int Log2Ceiling(uint32 n); +int Log2Ceiling64(uint64 n); + +// ------------------------------------------------------------------------ +// Implementation details follow +// ------------------------------------------------------------------------ + +#if defined(__GNUC__) + +// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. +inline int Log2Floor(uint32 n) { return n == 0 ? -1 : 31 ^ __builtin_clz(n); } + +// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. +inline int Log2Floor64(uint64 n) { + return n == 0 ? -1 : 63 ^ __builtin_clzll(n); +} + +#else + +// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. +inline int Log2Floor(uint32 n) { + if (n == 0) return -1; + int log = 0; + uint32 value = n; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + uint32 x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } + } + assert(value == 1); + return log; +} + +// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. +// Log2Floor64() is defined in terms of Log2Floor32() +inline int Log2Floor64(uint64 n) { + const uint32 topbits = static_cast(n >> 32); + if (topbits == 0) { + // Top bits are zero, so scan in bottom bits + return Log2Floor(static_cast(n)); + } else { + return 32 + Log2Floor(topbits); + } +} + +#endif + +inline int Log2Ceiling(uint32 n) { + int floor = Log2Floor(n); + if (n == (n & ~(n - 1))) // zero or a power of two + return floor; + else + return floor + 1; +} + +inline int Log2Ceiling64(uint64 n) { + int floor = Log2Floor64(n); + if (n == (n & ~(n - 1))) // zero or a power of two + return floor; + else + return floor + 1; +} + +inline uint32 NextPowerOfTwo(uint32 value) { + int exponent = Log2Ceiling(value); + // DCHECK_LT(exponent, std::numeric_limits::digits); + return 1 << exponent; +} + +inline uint64 NextPowerOfTwo64(uint64 value) { + int exponent = Log2Ceiling(value); + // DCHECK_LT(exponent, std::numeric_limits::digits); + return 1LL << exponent; +} + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_LIB_CORE_BITS_H_ diff --git a/src/fftsg.c b/src/fftsg.c new file mode 100644 index 0000000..40e45ae --- /dev/null +++ b/src/fftsg.c @@ -0,0 +1,3009 @@ +/* -------- child routines -------- */ + + +#ifdef USE_CDFT_PTHREADS +#define USE_CDFT_THREADS +#ifndef CDFT_THREADS_BEGIN_N +#define CDFT_THREADS_BEGIN_N 8192 +#endif +#ifndef CDFT_4THREADS_BEGIN_N +#define CDFT_4THREADS_BEGIN_N 65536 +#endif +#include +#include +#include +#define cdft_thread_t pthread_t +#define cdft_thread_create(thp,func,argp) { \ + if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \ + fprintf(stderr, "cdft thread error\n"); \ + exit(1); \ + } \ +} +#define cdft_thread_wait(th) { \ + if (pthread_join(th, NULL) != 0) { \ + fprintf(stderr, "cdft thread error\n"); \ + exit(1); \ + } \ +} +#endif /* USE_CDFT_PTHREADS */ + + +#ifdef USE_CDFT_WINTHREADS +#define USE_CDFT_THREADS +#ifndef CDFT_THREADS_BEGIN_N +#define CDFT_THREADS_BEGIN_N 32768 +#endif +#ifndef CDFT_4THREADS_BEGIN_N +#define CDFT_4THREADS_BEGIN_N 524288 +#endif +#include +#include +#include +#define cdft_thread_t HANDLE +#define cdft_thread_create(thp,func,argp) { \ + DWORD thid; \ + *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \ + if (*(thp) == 0) { \ + fprintf(stderr, "cdft thread error\n"); \ + exit(1); \ + } \ +} +#define cdft_thread_wait(th) { \ + WaitForSingleObject(th, INFINITE); \ + CloseHandle(th); \ +} +#endif /* USE_CDFT_WINTHREADS */ + + +void cftfsub(int n, double *a, int *ip, int nw, double *w) +{ + void bitrv2(int n, int *ip, double *a); + void bitrv216(double *a); + void bitrv208(double *a); + void cftf1st(int n, double *a, double *w); + void cftrec4(int n, double *a, int nw, double *w); + void cftleaf(int n, int isplt, double *a, int nw, double *w); + void cftfx41(int n, double *a, int nw, double *w); + void cftf161(double *a, double *w); + void cftf081(double *a, double *w); + void cftf040(double *a); + void cftx020(double *a); +#ifdef USE_CDFT_THREADS + void cftrec4_th(int n, double *a, int nw, double *w); +#endif /* USE_CDFT_THREADS */ + + if (n > 8) { + if (n > 32) { + cftf1st(n, a, &w[nw - (n >> 2)]); +#ifdef USE_CDFT_THREADS + if (n > CDFT_THREADS_BEGIN_N) { + cftrec4_th(n, a, nw, w); + } else +#endif /* USE_CDFT_THREADS */ + if (n > 512) { + cftrec4(n, a, nw, w); + } else if (n > 128) { + cftleaf(n, 1, a, nw, w); + } else { + cftfx41(n, a, nw, w); + } + bitrv2(n, ip, a); + } else if (n == 32) { + cftf161(a, &w[nw - 8]); + bitrv216(a); + } else { + cftf081(a, w); + bitrv208(a); + } + } else if (n == 8) { + cftf040(a); + } else if (n == 4) { + cftx020(a); + } +} + + +void cftbsub(int n, double *a, int *ip, int nw, double *w) +{ + void bitrv2conj(int n, int *ip, double *a); + void bitrv216neg(double *a); + void bitrv208neg(double *a); + void cftb1st(int n, double *a, double *w); + void cftrec4(int n, double *a, int nw, double *w); + void cftleaf(int n, int isplt, double *a, int nw, double *w); + void cftfx41(int n, double *a, int nw, double *w); + void cftf161(double *a, double *w); + void cftf081(double *a, double *w); + void cftb040(double *a); + void cftx020(double *a); +#ifdef USE_CDFT_THREADS + void cftrec4_th(int n, double *a, int nw, double *w); +#endif /* USE_CDFT_THREADS */ + + if (n > 8) { + if (n > 32) { + cftb1st(n, a, &w[nw - (n >> 2)]); +#ifdef USE_CDFT_THREADS + if (n > CDFT_THREADS_BEGIN_N) { + cftrec4_th(n, a, nw, w); + } else +#endif /* USE_CDFT_THREADS */ + if (n > 512) { + cftrec4(n, a, nw, w); + } else if (n > 128) { + cftleaf(n, 1, a, nw, w); + } else { + cftfx41(n, a, nw, w); + } + bitrv2conj(n, ip, a); + } else if (n == 32) { + cftf161(a, &w[nw - 8]); + bitrv216neg(a); + } else { + cftf081(a, w); + bitrv208neg(a); + } + } else if (n == 8) { + cftb040(a); + } else if (n == 4) { + cftx020(a); + } +} + + +void bitrv2(int n, int *ip, double *a) +{ + int j, j1, k, k1, l, m, nh, nm; + double xr, xi, yr, yi; + + m = 1; + for (l = n >> 2; l > 8; l >>= 2) { + m <<= 1; + } + nh = n >> 1; + nm = 4 * m; + if (l == 8) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 4 * j + 2 * ip[m + k]; + k1 = 4 * k + 2 * ip[m + j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh; + k1 += 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 += nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += 2; + k1 += nh; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh; + k1 -= 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 += nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 4 * k + 2 * ip[m + k]; + j1 = k1 + 2; + k1 += nh; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= 2; + k1 -= nh; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh + 2; + k1 += nh + 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh - nm; + k1 += 2 * nm - 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } else { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 4 * j + ip[m + k]; + k1 = 4 * k + ip[m + j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh; + k1 += 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += 2; + k1 += nh; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh; + k1 -= 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 4 * k + ip[m + k]; + j1 = k1 + 2; + k1 += nh; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } +} + + +void bitrv2conj(int n, int *ip, double *a) +{ + int j, j1, k, k1, l, m, nh, nm; + double xr, xi, yr, yi; + + m = 1; + for (l = n >> 2; l > 8; l >>= 2) { + m <<= 1; + } + nh = n >> 1; + nm = 4 * m; + if (l == 8) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 4 * j + 2 * ip[m + k]; + k1 = 4 * k + 2 * ip[m + j]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh; + k1 += 2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 += nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += 2; + k1 += nh; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh; + k1 -= 2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 += nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 4 * k + 2 * ip[m + k]; + j1 = k1 + 2; + k1 += nh; + a[j1 - 1] = -a[j1 - 1]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + a[k1 + 3] = -a[k1 + 3]; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= 2; + k1 -= nh; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh + 2; + k1 += nh + 2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh - nm; + k1 += 2 * nm - 2; + a[j1 - 1] = -a[j1 - 1]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + a[k1 + 3] = -a[k1 + 3]; + } + } else { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 4 * j + ip[m + k]; + k1 = 4 * k + ip[m + j]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh; + k1 += 2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += 2; + k1 += nh; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh; + k1 -= 2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 4 * k + ip[m + k]; + j1 = k1 + 2; + k1 += nh; + a[j1 - 1] = -a[j1 - 1]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + a[k1 + 3] = -a[k1 + 3]; + j1 += nm; + k1 += nm; + a[j1 - 1] = -a[j1 - 1]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + a[k1 + 3] = -a[k1 + 3]; + } + } +} + + +void bitrv216(double *a) +{ + double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, + x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, + x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i; + + x1r = a[2]; + x1i = a[3]; + x2r = a[4]; + x2i = a[5]; + x3r = a[6]; + x3i = a[7]; + x4r = a[8]; + x4i = a[9]; + x5r = a[10]; + x5i = a[11]; + x7r = a[14]; + x7i = a[15]; + x8r = a[16]; + x8i = a[17]; + x10r = a[20]; + x10i = a[21]; + x11r = a[22]; + x11i = a[23]; + x12r = a[24]; + x12i = a[25]; + x13r = a[26]; + x13i = a[27]; + x14r = a[28]; + x14i = a[29]; + a[2] = x8r; + a[3] = x8i; + a[4] = x4r; + a[5] = x4i; + a[6] = x12r; + a[7] = x12i; + a[8] = x2r; + a[9] = x2i; + a[10] = x10r; + a[11] = x10i; + a[14] = x14r; + a[15] = x14i; + a[16] = x1r; + a[17] = x1i; + a[20] = x5r; + a[21] = x5i; + a[22] = x13r; + a[23] = x13i; + a[24] = x3r; + a[25] = x3i; + a[26] = x11r; + a[27] = x11i; + a[28] = x7r; + a[29] = x7i; +} + + +void bitrv216neg(double *a) +{ + double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, + x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, + x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, + x13r, x13i, x14r, x14i, x15r, x15i; + + x1r = a[2]; + x1i = a[3]; + x2r = a[4]; + x2i = a[5]; + x3r = a[6]; + x3i = a[7]; + x4r = a[8]; + x4i = a[9]; + x5r = a[10]; + x5i = a[11]; + x6r = a[12]; + x6i = a[13]; + x7r = a[14]; + x7i = a[15]; + x8r = a[16]; + x8i = a[17]; + x9r = a[18]; + x9i = a[19]; + x10r = a[20]; + x10i = a[21]; + x11r = a[22]; + x11i = a[23]; + x12r = a[24]; + x12i = a[25]; + x13r = a[26]; + x13i = a[27]; + x14r = a[28]; + x14i = a[29]; + x15r = a[30]; + x15i = a[31]; + a[2] = x15r; + a[3] = x15i; + a[4] = x7r; + a[5] = x7i; + a[6] = x11r; + a[7] = x11i; + a[8] = x3r; + a[9] = x3i; + a[10] = x13r; + a[11] = x13i; + a[12] = x5r; + a[13] = x5i; + a[14] = x9r; + a[15] = x9i; + a[16] = x1r; + a[17] = x1i; + a[18] = x14r; + a[19] = x14i; + a[20] = x6r; + a[21] = x6i; + a[22] = x10r; + a[23] = x10i; + a[24] = x2r; + a[25] = x2i; + a[26] = x12r; + a[27] = x12i; + a[28] = x4r; + a[29] = x4i; + a[30] = x8r; + a[31] = x8i; +} + + +void bitrv208(double *a) +{ + double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i; + + x1r = a[2]; + x1i = a[3]; + x3r = a[6]; + x3i = a[7]; + x4r = a[8]; + x4i = a[9]; + x6r = a[12]; + x6i = a[13]; + a[2] = x4r; + a[3] = x4i; + a[6] = x6r; + a[7] = x6i; + a[8] = x1r; + a[9] = x1i; + a[12] = x3r; + a[13] = x3i; +} + + +void bitrv208neg(double *a) +{ + double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, + x5r, x5i, x6r, x6i, x7r, x7i; + + x1r = a[2]; + x1i = a[3]; + x2r = a[4]; + x2i = a[5]; + x3r = a[6]; + x3i = a[7]; + x4r = a[8]; + x4i = a[9]; + x5r = a[10]; + x5i = a[11]; + x6r = a[12]; + x6i = a[13]; + x7r = a[14]; + x7i = a[15]; + a[2] = x7r; + a[3] = x7i; + a[4] = x3r; + a[5] = x3i; + a[6] = x5r; + a[7] = x5i; + a[8] = x1r; + a[9] = x1i; + a[10] = x6r; + a[11] = x6i; + a[12] = x2r; + a[13] = x2i; + a[14] = x4r; + a[15] = x4i; +} + + +void cftf1st(int n, double *a, double *w) +{ + int j, j0, j1, j2, j3, k, m, mh; + double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, + wd1r, wd1i, wd3r, wd3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; + + mh = n >> 3; + m = 2 * mh; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] + a[j2]; + x0i = a[1] + a[j2 + 1]; + x1r = a[0] - a[j2]; + x1i = a[1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j2] = x1r - x3i; + a[j2 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + wn4r = w[1]; + csc1 = w[2]; + csc3 = w[3]; + wd1r = 1; + wd1i = 0; + wd3r = 1; + wd3i = 0; + k = 0; + for (j = 2; j < mh - 2; j += 4) { + k += 4; + wk1r = csc1 * (wd1r + w[k]); + wk1i = csc1 * (wd1i + w[k + 1]); + wk3r = csc3 * (wd3r + w[k + 2]); + wk3i = csc3 * (wd3i + w[k + 3]); + wd1r = w[k]; + wd1i = w[k + 1]; + wd3r = w[k + 2]; + wd3i = w[k + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] + a[j2]; + x0i = a[j + 1] + a[j2 + 1]; + x1r = a[j] - a[j2]; + x1i = a[j + 1] - a[j2 + 1]; + y0r = a[j + 2] + a[j2 + 2]; + y0i = a[j + 3] + a[j2 + 3]; + y1r = a[j + 2] - a[j2 + 2]; + y1i = a[j + 3] - a[j2 + 3]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 + 2] + a[j3 + 2]; + y2i = a[j1 + 3] + a[j3 + 3]; + y3r = a[j1 + 2] - a[j3 + 2]; + y3i = a[j1 + 3] - a[j3 + 3]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j + 2] = y0r + y2r; + a[j + 3] = y0i + y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j1 + 2] = y0r - y2r; + a[j1 + 3] = y0i - y2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1r * x0r - wk1i * x0i; + a[j2 + 1] = wk1r * x0i + wk1i * x0r; + x0r = y1r - y3i; + x0i = y1i + y3r; + a[j2 + 2] = wd1r * x0r - wd1i * x0i; + a[j2 + 3] = wd1r * x0i + wd1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r + wk3i * x0i; + a[j3 + 1] = wk3r * x0i - wk3i * x0r; + x0r = y1r + y3i; + x0i = y1i - y3r; + a[j3 + 2] = wd3r * x0r + wd3i * x0i; + a[j3 + 3] = wd3r * x0i - wd3i * x0r; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + y0r = a[j0 - 2] + a[j2 - 2]; + y0i = a[j0 - 1] + a[j2 - 1]; + y1r = a[j0 - 2] - a[j2 - 2]; + y1i = a[j0 - 1] - a[j2 - 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 - 2] + a[j3 - 2]; + y2i = a[j1 - 1] + a[j3 - 1]; + y3r = a[j1 - 2] - a[j3 - 2]; + y3i = a[j1 - 1] - a[j3 - 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j0 - 2] = y0r + y2r; + a[j0 - 1] = y0i + y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j1 - 2] = y0r - y2r; + a[j1 - 1] = y0i - y2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1i * x0r - wk1r * x0i; + a[j2 + 1] = wk1i * x0i + wk1r * x0r; + x0r = y1r - y3i; + x0i = y1i + y3r; + a[j2 - 2] = wd1i * x0r - wd1r * x0i; + a[j2 - 1] = wd1i * x0i + wd1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3i * x0r + wk3r * x0i; + a[j3 + 1] = wk3i * x0i - wk3r * x0r; + x0r = y1r + y3i; + x0i = y1i - y3r; + a[j3 - 2] = wd3i * x0r + wd3r * x0i; + a[j3 - 1] = wd3i * x0i - wd3r * x0r; + } + wk1r = csc1 * (wd1r + wn4r); + wk1i = csc1 * (wd1i + wn4r); + wk3r = csc3 * (wd3r - wn4r); + wk3i = csc3 * (wd3i - wn4r); + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0 - 2] + a[j2 - 2]; + x0i = a[j0 - 1] + a[j2 - 1]; + x1r = a[j0 - 2] - a[j2 - 2]; + x1i = a[j0 - 1] - a[j2 - 1]; + x2r = a[j1 - 2] + a[j3 - 2]; + x2i = a[j1 - 1] + a[j3 - 1]; + x3r = a[j1 - 2] - a[j3 - 2]; + x3i = a[j1 - 1] - a[j3 - 1]; + a[j0 - 2] = x0r + x2r; + a[j0 - 1] = x0i + x2i; + a[j1 - 2] = x0r - x2r; + a[j1 - 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2 - 2] = wk1r * x0r - wk1i * x0i; + a[j2 - 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3 - 2] = wk3r * x0r + wk3i * x0i; + a[j3 - 1] = wk3r * x0i - wk3i * x0r; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wn4r * (x0r - x0i); + a[j2 + 1] = wn4r * (x0i + x0r); + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = -wn4r * (x0r + x0i); + a[j3 + 1] = -wn4r * (x0i - x0r); + x0r = a[j0 + 2] + a[j2 + 2]; + x0i = a[j0 + 3] + a[j2 + 3]; + x1r = a[j0 + 2] - a[j2 + 2]; + x1i = a[j0 + 3] - a[j2 + 3]; + x2r = a[j1 + 2] + a[j3 + 2]; + x2i = a[j1 + 3] + a[j3 + 3]; + x3r = a[j1 + 2] - a[j3 + 2]; + x3i = a[j1 + 3] - a[j3 + 3]; + a[j0 + 2] = x0r + x2r; + a[j0 + 3] = x0i + x2i; + a[j1 + 2] = x0r - x2r; + a[j1 + 3] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2 + 2] = wk1i * x0r - wk1r * x0i; + a[j2 + 3] = wk1i * x0i + wk1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3 + 2] = wk3i * x0r + wk3r * x0i; + a[j3 + 3] = wk3i * x0i - wk3r * x0r; +} + + +void cftb1st(int n, double *a, double *w) +{ + int j, j0, j1, j2, j3, k, m, mh; + double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, + wd1r, wd1i, wd3r, wd3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; + + mh = n >> 3; + m = 2 * mh; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] + a[j2]; + x0i = -a[1] - a[j2 + 1]; + x1r = a[0] - a[j2]; + x1i = -a[1] + a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[0] = x0r + x2r; + a[1] = x0i - x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + a[j2] = x1r + x3i; + a[j2 + 1] = x1i + x3r; + a[j3] = x1r - x3i; + a[j3 + 1] = x1i - x3r; + wn4r = w[1]; + csc1 = w[2]; + csc3 = w[3]; + wd1r = 1; + wd1i = 0; + wd3r = 1; + wd3i = 0; + k = 0; + for (j = 2; j < mh - 2; j += 4) { + k += 4; + wk1r = csc1 * (wd1r + w[k]); + wk1i = csc1 * (wd1i + w[k + 1]); + wk3r = csc3 * (wd3r + w[k + 2]); + wk3i = csc3 * (wd3i + w[k + 3]); + wd1r = w[k]; + wd1i = w[k + 1]; + wd3r = w[k + 2]; + wd3i = w[k + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] + a[j2]; + x0i = -a[j + 1] - a[j2 + 1]; + x1r = a[j] - a[j2]; + x1i = -a[j + 1] + a[j2 + 1]; + y0r = a[j + 2] + a[j2 + 2]; + y0i = -a[j + 3] - a[j2 + 3]; + y1r = a[j + 2] - a[j2 + 2]; + y1i = -a[j + 3] + a[j2 + 3]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 + 2] + a[j3 + 2]; + y2i = a[j1 + 3] + a[j3 + 3]; + y3r = a[j1 + 2] - a[j3 + 2]; + y3i = a[j1 + 3] - a[j3 + 3]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j + 2] = y0r + y2r; + a[j + 3] = y0i - y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + a[j1 + 2] = y0r - y2r; + a[j1 + 3] = y0i + y2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2] = wk1r * x0r - wk1i * x0i; + a[j2 + 1] = wk1r * x0i + wk1i * x0r; + x0r = y1r + y3i; + x0i = y1i + y3r; + a[j2 + 2] = wd1r * x0r - wd1i * x0i; + a[j2 + 3] = wd1r * x0i + wd1i * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r + wk3i * x0i; + a[j3 + 1] = wk3r * x0i - wk3i * x0r; + x0r = y1r - y3i; + x0i = y1i - y3r; + a[j3 + 2] = wd3r * x0r + wd3i * x0i; + a[j3 + 3] = wd3r * x0i - wd3i * x0r; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = -a[j0 + 1] - a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = -a[j0 + 1] + a[j2 + 1]; + y0r = a[j0 - 2] + a[j2 - 2]; + y0i = -a[j0 - 1] - a[j2 - 1]; + y1r = a[j0 - 2] - a[j2 - 2]; + y1i = -a[j0 - 1] + a[j2 - 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 - 2] + a[j3 - 2]; + y2i = a[j1 - 1] + a[j3 - 1]; + y3r = a[j1 - 2] - a[j3 - 2]; + y3i = a[j1 - 1] - a[j3 - 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i - x2i; + a[j0 - 2] = y0r + y2r; + a[j0 - 1] = y0i - y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + a[j1 - 2] = y0r - y2r; + a[j1 - 1] = y0i + y2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2] = wk1i * x0r - wk1r * x0i; + a[j2 + 1] = wk1i * x0i + wk1r * x0r; + x0r = y1r + y3i; + x0i = y1i + y3r; + a[j2 - 2] = wd1i * x0r - wd1r * x0i; + a[j2 - 1] = wd1i * x0i + wd1r * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3] = wk3i * x0r + wk3r * x0i; + a[j3 + 1] = wk3i * x0i - wk3r * x0r; + x0r = y1r - y3i; + x0i = y1i - y3r; + a[j3 - 2] = wd3i * x0r + wd3r * x0i; + a[j3 - 1] = wd3i * x0i - wd3r * x0r; + } + wk1r = csc1 * (wd1r + wn4r); + wk1i = csc1 * (wd1i + wn4r); + wk3r = csc3 * (wd3r - wn4r); + wk3i = csc3 * (wd3i - wn4r); + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0 - 2] + a[j2 - 2]; + x0i = -a[j0 - 1] - a[j2 - 1]; + x1r = a[j0 - 2] - a[j2 - 2]; + x1i = -a[j0 - 1] + a[j2 - 1]; + x2r = a[j1 - 2] + a[j3 - 2]; + x2i = a[j1 - 1] + a[j3 - 1]; + x3r = a[j1 - 2] - a[j3 - 2]; + x3i = a[j1 - 1] - a[j3 - 1]; + a[j0 - 2] = x0r + x2r; + a[j0 - 1] = x0i - x2i; + a[j1 - 2] = x0r - x2r; + a[j1 - 1] = x0i + x2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2 - 2] = wk1r * x0r - wk1i * x0i; + a[j2 - 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3 - 2] = wk3r * x0r + wk3i * x0i; + a[j3 - 1] = wk3r * x0i - wk3i * x0r; + x0r = a[j0] + a[j2]; + x0i = -a[j0 + 1] - a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = -a[j0 + 1] + a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i - x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2] = wn4r * (x0r - x0i); + a[j2 + 1] = wn4r * (x0i + x0r); + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3] = -wn4r * (x0r + x0i); + a[j3 + 1] = -wn4r * (x0i - x0r); + x0r = a[j0 + 2] + a[j2 + 2]; + x0i = -a[j0 + 3] - a[j2 + 3]; + x1r = a[j0 + 2] - a[j2 + 2]; + x1i = -a[j0 + 3] + a[j2 + 3]; + x2r = a[j1 + 2] + a[j3 + 2]; + x2i = a[j1 + 3] + a[j3 + 3]; + x3r = a[j1 + 2] - a[j3 + 2]; + x3i = a[j1 + 3] - a[j3 + 3]; + a[j0 + 2] = x0r + x2r; + a[j0 + 3] = x0i - x2i; + a[j1 + 2] = x0r - x2r; + a[j1 + 3] = x0i + x2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2 + 2] = wk1i * x0r - wk1r * x0i; + a[j2 + 3] = wk1i * x0i + wk1r * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3 + 2] = wk3i * x0r + wk3r * x0i; + a[j3 + 3] = wk3i * x0i - wk3r * x0r; +} + + +#ifdef USE_CDFT_THREADS +struct cdft_arg_st { + int n0; + int n; + double *a; + int nw; + double *w; +}; +typedef struct cdft_arg_st cdft_arg_t; + + +void cftrec4_th(int n, double *a, int nw, double *w) +{ + void *cftrec1_th(void *p); + void *cftrec2_th(void *p); + int i, idiv4, m, nthread; + cdft_thread_t th[4]; + cdft_arg_t ag[4]; + + nthread = 2; + idiv4 = 0; + m = n >> 1; + if (n > CDFT_4THREADS_BEGIN_N) { + nthread = 4; + idiv4 = 1; + m >>= 1; + } + for (i = 0; i < nthread; i++) { + ag[i].n0 = n; + ag[i].n = m; + ag[i].a = &a[i * m]; + ag[i].nw = nw; + ag[i].w = w; + if (i != idiv4) { + cdft_thread_create(&th[i], cftrec1_th, &ag[i]); + } else { + cdft_thread_create(&th[i], cftrec2_th, &ag[i]); + } + } + for (i = 0; i < nthread; i++) { + cdft_thread_wait(th[i]); + } +} + + +void *cftrec1_th(void *p) +{ + int cfttree(int n, int j, int k, double *a, int nw, double *w); + void cftleaf(int n, int isplt, double *a, int nw, double *w); + void cftmdl1(int n, double *a, double *w); + int isplt, j, k, m, n, n0, nw; + double *a, *w; + + n0 = ((cdft_arg_t *) p)->n0; + n = ((cdft_arg_t *) p)->n; + a = ((cdft_arg_t *) p)->a; + nw = ((cdft_arg_t *) p)->nw; + w = ((cdft_arg_t *) p)->w; + m = n0; + while (m > 512) { + m >>= 2; + cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); + } + cftleaf(m, 1, &a[n - m], nw, w); + k = 0; + for (j = n - m; j > 0; j -= m) { + k++; + isplt = cfttree(m, j, k, a, nw, w); + cftleaf(m, isplt, &a[j - m], nw, w); + } + return (void *) 0; +} + + +void *cftrec2_th(void *p) +{ + int cfttree(int n, int j, int k, double *a, int nw, double *w); + void cftleaf(int n, int isplt, double *a, int nw, double *w); + void cftmdl2(int n, double *a, double *w); + int isplt, j, k, m, n, n0, nw; + double *a, *w; + + n0 = ((cdft_arg_t *) p)->n0; + n = ((cdft_arg_t *) p)->n; + a = ((cdft_arg_t *) p)->a; + nw = ((cdft_arg_t *) p)->nw; + w = ((cdft_arg_t *) p)->w; + k = 1; + m = n0; + while (m > 512) { + m >>= 2; + k <<= 2; + cftmdl2(m, &a[n - m], &w[nw - m]); + } + cftleaf(m, 0, &a[n - m], nw, w); + k >>= 1; + for (j = n - m; j > 0; j -= m) { + k++; + isplt = cfttree(m, j, k, a, nw, w); + cftleaf(m, isplt, &a[j - m], nw, w); + } + return (void *) 0; +} +#endif /* USE_CDFT_THREADS */ + + +void cftrec4(int n, double *a, int nw, double *w) +{ + int cfttree(int n, int j, int k, double *a, int nw, double *w); + void cftleaf(int n, int isplt, double *a, int nw, double *w); + void cftmdl1(int n, double *a, double *w); + int isplt, j, k, m; + + m = n; + while (m > 512) { + m >>= 2; + cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); + } + cftleaf(m, 1, &a[n - m], nw, w); + k = 0; + for (j = n - m; j > 0; j -= m) { + k++; + isplt = cfttree(m, j, k, a, nw, w); + cftleaf(m, isplt, &a[j - m], nw, w); + } +} + + +int cfttree(int n, int j, int k, double *a, int nw, double *w) +{ + void cftmdl1(int n, double *a, double *w); + void cftmdl2(int n, double *a, double *w); + int i, isplt, m; + + if ((k & 3) != 0) { + isplt = k & 1; + if (isplt != 0) { + cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]); + } else { + cftmdl2(n, &a[j - n], &w[nw - n]); + } + } else { + m = n; + for (i = k; (i & 3) == 0; i >>= 2) { + m <<= 2; + } + isplt = i & 1; + if (isplt != 0) { + while (m > 128) { + cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]); + m >>= 2; + } + } else { + while (m > 128) { + cftmdl2(m, &a[j - m], &w[nw - m]); + m >>= 2; + } + } + } + return isplt; +} + + +void cftleaf(int n, int isplt, double *a, int nw, double *w) +{ + void cftmdl1(int n, double *a, double *w); + void cftmdl2(int n, double *a, double *w); + void cftf161(double *a, double *w); + void cftf162(double *a, double *w); + void cftf081(double *a, double *w); + void cftf082(double *a, double *w); + + if (n == 512) { + cftmdl1(128, a, &w[nw - 64]); + cftf161(a, &w[nw - 8]); + cftf162(&a[32], &w[nw - 32]); + cftf161(&a[64], &w[nw - 8]); + cftf161(&a[96], &w[nw - 8]); + cftmdl2(128, &a[128], &w[nw - 128]); + cftf161(&a[128], &w[nw - 8]); + cftf162(&a[160], &w[nw - 32]); + cftf161(&a[192], &w[nw - 8]); + cftf162(&a[224], &w[nw - 32]); + cftmdl1(128, &a[256], &w[nw - 64]); + cftf161(&a[256], &w[nw - 8]); + cftf162(&a[288], &w[nw - 32]); + cftf161(&a[320], &w[nw - 8]); + cftf161(&a[352], &w[nw - 8]); + if (isplt != 0) { + cftmdl1(128, &a[384], &w[nw - 64]); + cftf161(&a[480], &w[nw - 8]); + } else { + cftmdl2(128, &a[384], &w[nw - 128]); + cftf162(&a[480], &w[nw - 32]); + } + cftf161(&a[384], &w[nw - 8]); + cftf162(&a[416], &w[nw - 32]); + cftf161(&a[448], &w[nw - 8]); + } else { + cftmdl1(64, a, &w[nw - 32]); + cftf081(a, &w[nw - 8]); + cftf082(&a[16], &w[nw - 8]); + cftf081(&a[32], &w[nw - 8]); + cftf081(&a[48], &w[nw - 8]); + cftmdl2(64, &a[64], &w[nw - 64]); + cftf081(&a[64], &w[nw - 8]); + cftf082(&a[80], &w[nw - 8]); + cftf081(&a[96], &w[nw - 8]); + cftf082(&a[112], &w[nw - 8]); + cftmdl1(64, &a[128], &w[nw - 32]); + cftf081(&a[128], &w[nw - 8]); + cftf082(&a[144], &w[nw - 8]); + cftf081(&a[160], &w[nw - 8]); + cftf081(&a[176], &w[nw - 8]); + if (isplt != 0) { + cftmdl1(64, &a[192], &w[nw - 32]); + cftf081(&a[240], &w[nw - 8]); + } else { + cftmdl2(64, &a[192], &w[nw - 64]); + cftf082(&a[240], &w[nw - 8]); + } + cftf081(&a[192], &w[nw - 8]); + cftf082(&a[208], &w[nw - 8]); + cftf081(&a[224], &w[nw - 8]); + } +} + + +void cftmdl1(int n, double *a, double *w) +{ + int j, j0, j1, j2, j3, k, m, mh; + double wn4r, wk1r, wk1i, wk3r, wk3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + mh = n >> 3; + m = 2 * mh; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] + a[j2]; + x0i = a[1] + a[j2 + 1]; + x1r = a[0] - a[j2]; + x1i = a[1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j2] = x1r - x3i; + a[j2 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + wn4r = w[1]; + k = 0; + for (j = 2; j < mh; j += 2) { + k += 4; + wk1r = w[k]; + wk1i = w[k + 1]; + wk3r = w[k + 2]; + wk3i = w[k + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] + a[j2]; + x0i = a[j + 1] + a[j2 + 1]; + x1r = a[j] - a[j2]; + x1i = a[j + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1r * x0r - wk1i * x0i; + a[j2 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r + wk3i * x0i; + a[j3 + 1] = wk3r * x0i - wk3i * x0r; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1i * x0r - wk1r * x0i; + a[j2 + 1] = wk1i * x0i + wk1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3i * x0r + wk3r * x0i; + a[j3 + 1] = wk3i * x0i - wk3r * x0r; + } + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wn4r * (x0r - x0i); + a[j2 + 1] = wn4r * (x0i + x0r); + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = -wn4r * (x0r + x0i); + a[j3 + 1] = -wn4r * (x0i - x0r); +} + + +void cftmdl2(int n, double *a, double *w) +{ + int j, j0, j1, j2, j3, k, kr, m, mh; + double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i; + + mh = n >> 3; + m = 2 * mh; + wn4r = w[1]; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] - a[j2 + 1]; + x0i = a[1] + a[j2]; + x1r = a[0] + a[j2 + 1]; + x1i = a[1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wn4r * (x2r - x2i); + y0i = wn4r * (x2i + x2r); + a[0] = x0r + y0r; + a[1] = x0i + y0i; + a[j1] = x0r - y0r; + a[j1 + 1] = x0i - y0i; + y0r = wn4r * (x3r - x3i); + y0i = wn4r * (x3i + x3r); + a[j2] = x1r - y0i; + a[j2 + 1] = x1i + y0r; + a[j3] = x1r + y0i; + a[j3 + 1] = x1i - y0r; + k = 0; + kr = 2 * m; + for (j = 2; j < mh; j += 2) { + k += 4; + wk1r = w[k]; + wk1i = w[k + 1]; + wk3r = w[k + 2]; + wk3i = w[k + 3]; + kr -= 4; + wd1i = w[kr]; + wd1r = w[kr + 1]; + wd3i = w[kr + 2]; + wd3r = w[kr + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] - a[j2 + 1]; + x0i = a[j + 1] + a[j2]; + x1r = a[j] + a[j2 + 1]; + x1i = a[j + 1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wk1r * x0r - wk1i * x0i; + y0i = wk1r * x0i + wk1i * x0r; + y2r = wd1r * x2r - wd1i * x2i; + y2i = wd1r * x2i + wd1i * x2r; + a[j] = y0r + y2r; + a[j + 1] = y0i + y2i; + a[j1] = y0r - y2r; + a[j1 + 1] = y0i - y2i; + y0r = wk3r * x1r + wk3i * x1i; + y0i = wk3r * x1i - wk3i * x1r; + y2r = wd3r * x3r + wd3i * x3i; + y2i = wd3r * x3i - wd3i * x3r; + a[j2] = y0r + y2r; + a[j2 + 1] = y0i + y2i; + a[j3] = y0r - y2r; + a[j3 + 1] = y0i - y2i; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] - a[j2 + 1]; + x0i = a[j0 + 1] + a[j2]; + x1r = a[j0] + a[j2 + 1]; + x1i = a[j0 + 1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wd1i * x0r - wd1r * x0i; + y0i = wd1i * x0i + wd1r * x0r; + y2r = wk1i * x2r - wk1r * x2i; + y2i = wk1i * x2i + wk1r * x2r; + a[j0] = y0r + y2r; + a[j0 + 1] = y0i + y2i; + a[j1] = y0r - y2r; + a[j1 + 1] = y0i - y2i; + y0r = wd3i * x1r + wd3r * x1i; + y0i = wd3i * x1i - wd3r * x1r; + y2r = wk3i * x3r + wk3r * x3i; + y2i = wk3i * x3i - wk3r * x3r; + a[j2] = y0r + y2r; + a[j2 + 1] = y0i + y2i; + a[j3] = y0r - y2r; + a[j3 + 1] = y0i - y2i; + } + wk1r = w[m]; + wk1i = w[m + 1]; + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] - a[j2 + 1]; + x0i = a[j0 + 1] + a[j2]; + x1r = a[j0] + a[j2 + 1]; + x1i = a[j0 + 1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wk1r * x0r - wk1i * x0i; + y0i = wk1r * x0i + wk1i * x0r; + y2r = wk1i * x2r - wk1r * x2i; + y2i = wk1i * x2i + wk1r * x2r; + a[j0] = y0r + y2r; + a[j0 + 1] = y0i + y2i; + a[j1] = y0r - y2r; + a[j1 + 1] = y0i - y2i; + y0r = wk1i * x1r - wk1r * x1i; + y0i = wk1i * x1i + wk1r * x1r; + y2r = wk1r * x3r - wk1i * x3i; + y2i = wk1r * x3i + wk1i * x3r; + a[j2] = y0r - y2r; + a[j2 + 1] = y0i - y2i; + a[j3] = y0r + y2r; + a[j3 + 1] = y0i + y2i; +} + + +void cftfx41(int n, double *a, int nw, double *w) +{ + void cftf161(double *a, double *w); + void cftf162(double *a, double *w); + void cftf081(double *a, double *w); + void cftf082(double *a, double *w); + + if (n == 128) { + cftf161(a, &w[nw - 8]); + cftf162(&a[32], &w[nw - 32]); + cftf161(&a[64], &w[nw - 8]); + cftf161(&a[96], &w[nw - 8]); + } else { + cftf081(a, &w[nw - 8]); + cftf082(&a[16], &w[nw - 8]); + cftf081(&a[32], &w[nw - 8]); + cftf081(&a[48], &w[nw - 8]); + } +} + + +void cftf161(double *a, double *w) +{ + double wn4r, wk1r, wk1i, + x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, + y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, + y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, + y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; + + wn4r = w[1]; + wk1r = w[2]; + wk1i = w[3]; + x0r = a[0] + a[16]; + x0i = a[1] + a[17]; + x1r = a[0] - a[16]; + x1i = a[1] - a[17]; + x2r = a[8] + a[24]; + x2i = a[9] + a[25]; + x3r = a[8] - a[24]; + x3i = a[9] - a[25]; + y0r = x0r + x2r; + y0i = x0i + x2i; + y4r = x0r - x2r; + y4i = x0i - x2i; + y8r = x1r - x3i; + y8i = x1i + x3r; + y12r = x1r + x3i; + y12i = x1i - x3r; + x0r = a[2] + a[18]; + x0i = a[3] + a[19]; + x1r = a[2] - a[18]; + x1i = a[3] - a[19]; + x2r = a[10] + a[26]; + x2i = a[11] + a[27]; + x3r = a[10] - a[26]; + x3i = a[11] - a[27]; + y1r = x0r + x2r; + y1i = x0i + x2i; + y5r = x0r - x2r; + y5i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + y9r = wk1r * x0r - wk1i * x0i; + y9i = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + y13r = wk1i * x0r - wk1r * x0i; + y13i = wk1i * x0i + wk1r * x0r; + x0r = a[4] + a[20]; + x0i = a[5] + a[21]; + x1r = a[4] - a[20]; + x1i = a[5] - a[21]; + x2r = a[12] + a[28]; + x2i = a[13] + a[29]; + x3r = a[12] - a[28]; + x3i = a[13] - a[29]; + y2r = x0r + x2r; + y2i = x0i + x2i; + y6r = x0r - x2r; + y6i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + y10r = wn4r * (x0r - x0i); + y10i = wn4r * (x0i + x0r); + x0r = x1r + x3i; + x0i = x1i - x3r; + y14r = wn4r * (x0r + x0i); + y14i = wn4r * (x0i - x0r); + x0r = a[6] + a[22]; + x0i = a[7] + a[23]; + x1r = a[6] - a[22]; + x1i = a[7] - a[23]; + x2r = a[14] + a[30]; + x2i = a[15] + a[31]; + x3r = a[14] - a[30]; + x3i = a[15] - a[31]; + y3r = x0r + x2r; + y3i = x0i + x2i; + y7r = x0r - x2r; + y7i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + y11r = wk1i * x0r - wk1r * x0i; + y11i = wk1i * x0i + wk1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + y15r = wk1r * x0r - wk1i * x0i; + y15i = wk1r * x0i + wk1i * x0r; + x0r = y12r - y14r; + x0i = y12i - y14i; + x1r = y12r + y14r; + x1i = y12i + y14i; + x2r = y13r - y15r; + x2i = y13i - y15i; + x3r = y13r + y15r; + x3i = y13i + y15i; + a[24] = x0r + x2r; + a[25] = x0i + x2i; + a[26] = x0r - x2r; + a[27] = x0i - x2i; + a[28] = x1r - x3i; + a[29] = x1i + x3r; + a[30] = x1r + x3i; + a[31] = x1i - x3r; + x0r = y8r + y10r; + x0i = y8i + y10i; + x1r = y8r - y10r; + x1i = y8i - y10i; + x2r = y9r + y11r; + x2i = y9i + y11i; + x3r = y9r - y11r; + x3i = y9i - y11i; + a[16] = x0r + x2r; + a[17] = x0i + x2i; + a[18] = x0r - x2r; + a[19] = x0i - x2i; + a[20] = x1r - x3i; + a[21] = x1i + x3r; + a[22] = x1r + x3i; + a[23] = x1i - x3r; + x0r = y5r - y7i; + x0i = y5i + y7r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + x0r = y5r + y7i; + x0i = y5i - y7r; + x3r = wn4r * (x0r - x0i); + x3i = wn4r * (x0i + x0r); + x0r = y4r - y6i; + x0i = y4i + y6r; + x1r = y4r + y6i; + x1i = y4i - y6r; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[10] = x0r - x2r; + a[11] = x0i - x2i; + a[12] = x1r - x3i; + a[13] = x1i + x3r; + a[14] = x1r + x3i; + a[15] = x1i - x3r; + x0r = y0r + y2r; + x0i = y0i + y2i; + x1r = y0r - y2r; + x1i = y0i - y2i; + x2r = y1r + y3r; + x2i = y1i + y3i; + x3r = y1r - y3r; + x3i = y1i - y3i; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[2] = x0r - x2r; + a[3] = x0i - x2i; + a[4] = x1r - x3i; + a[5] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; +} + + +void cftf162(double *a, double *w) +{ + double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, + x0r, x0i, x1r, x1i, x2r, x2i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, + y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, + y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, + y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; + + wn4r = w[1]; + wk1r = w[4]; + wk1i = w[5]; + wk3r = w[6]; + wk3i = -w[7]; + wk2r = w[8]; + wk2i = w[9]; + x1r = a[0] - a[17]; + x1i = a[1] + a[16]; + x0r = a[8] - a[25]; + x0i = a[9] + a[24]; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + y0r = x1r + x2r; + y0i = x1i + x2i; + y4r = x1r - x2r; + y4i = x1i - x2i; + x1r = a[0] + a[17]; + x1i = a[1] - a[16]; + x0r = a[8] + a[25]; + x0i = a[9] - a[24]; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + y8r = x1r - x2i; + y8i = x1i + x2r; + y12r = x1r + x2i; + y12i = x1i - x2r; + x0r = a[2] - a[19]; + x0i = a[3] + a[18]; + x1r = wk1r * x0r - wk1i * x0i; + x1i = wk1r * x0i + wk1i * x0r; + x0r = a[10] - a[27]; + x0i = a[11] + a[26]; + x2r = wk3i * x0r - wk3r * x0i; + x2i = wk3i * x0i + wk3r * x0r; + y1r = x1r + x2r; + y1i = x1i + x2i; + y5r = x1r - x2r; + y5i = x1i - x2i; + x0r = a[2] + a[19]; + x0i = a[3] - a[18]; + x1r = wk3r * x0r - wk3i * x0i; + x1i = wk3r * x0i + wk3i * x0r; + x0r = a[10] + a[27]; + x0i = a[11] - a[26]; + x2r = wk1r * x0r + wk1i * x0i; + x2i = wk1r * x0i - wk1i * x0r; + y9r = x1r - x2r; + y9i = x1i - x2i; + y13r = x1r + x2r; + y13i = x1i + x2i; + x0r = a[4] - a[21]; + x0i = a[5] + a[20]; + x1r = wk2r * x0r - wk2i * x0i; + x1i = wk2r * x0i + wk2i * x0r; + x0r = a[12] - a[29]; + x0i = a[13] + a[28]; + x2r = wk2i * x0r - wk2r * x0i; + x2i = wk2i * x0i + wk2r * x0r; + y2r = x1r + x2r; + y2i = x1i + x2i; + y6r = x1r - x2r; + y6i = x1i - x2i; + x0r = a[4] + a[21]; + x0i = a[5] - a[20]; + x1r = wk2i * x0r - wk2r * x0i; + x1i = wk2i * x0i + wk2r * x0r; + x0r = a[12] + a[29]; + x0i = a[13] - a[28]; + x2r = wk2r * x0r - wk2i * x0i; + x2i = wk2r * x0i + wk2i * x0r; + y10r = x1r - x2r; + y10i = x1i - x2i; + y14r = x1r + x2r; + y14i = x1i + x2i; + x0r = a[6] - a[23]; + x0i = a[7] + a[22]; + x1r = wk3r * x0r - wk3i * x0i; + x1i = wk3r * x0i + wk3i * x0r; + x0r = a[14] - a[31]; + x0i = a[15] + a[30]; + x2r = wk1i * x0r - wk1r * x0i; + x2i = wk1i * x0i + wk1r * x0r; + y3r = x1r + x2r; + y3i = x1i + x2i; + y7r = x1r - x2r; + y7i = x1i - x2i; + x0r = a[6] + a[23]; + x0i = a[7] - a[22]; + x1r = wk1i * x0r + wk1r * x0i; + x1i = wk1i * x0i - wk1r * x0r; + x0r = a[14] + a[31]; + x0i = a[15] - a[30]; + x2r = wk3i * x0r - wk3r * x0i; + x2i = wk3i * x0i + wk3r * x0r; + y11r = x1r + x2r; + y11i = x1i + x2i; + y15r = x1r - x2r; + y15i = x1i - x2i; + x1r = y0r + y2r; + x1i = y0i + y2i; + x2r = y1r + y3r; + x2i = y1i + y3i; + a[0] = x1r + x2r; + a[1] = x1i + x2i; + a[2] = x1r - x2r; + a[3] = x1i - x2i; + x1r = y0r - y2r; + x1i = y0i - y2i; + x2r = y1r - y3r; + x2i = y1i - y3i; + a[4] = x1r - x2i; + a[5] = x1i + x2r; + a[6] = x1r + x2i; + a[7] = x1i - x2r; + x1r = y4r - y6i; + x1i = y4i + y6r; + x0r = y5r - y7i; + x0i = y5i + y7r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[8] = x1r + x2r; + a[9] = x1i + x2i; + a[10] = x1r - x2r; + a[11] = x1i - x2i; + x1r = y4r + y6i; + x1i = y4i - y6r; + x0r = y5r + y7i; + x0i = y5i - y7r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[12] = x1r - x2i; + a[13] = x1i + x2r; + a[14] = x1r + x2i; + a[15] = x1i - x2r; + x1r = y8r + y10r; + x1i = y8i + y10i; + x2r = y9r - y11r; + x2i = y9i - y11i; + a[16] = x1r + x2r; + a[17] = x1i + x2i; + a[18] = x1r - x2r; + a[19] = x1i - x2i; + x1r = y8r - y10r; + x1i = y8i - y10i; + x2r = y9r + y11r; + x2i = y9i + y11i; + a[20] = x1r - x2i; + a[21] = x1i + x2r; + a[22] = x1r + x2i; + a[23] = x1i - x2r; + x1r = y12r - y14i; + x1i = y12i + y14r; + x0r = y13r + y15i; + x0i = y13i - y15r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[24] = x1r + x2r; + a[25] = x1i + x2i; + a[26] = x1r - x2r; + a[27] = x1i - x2i; + x1r = y12r + y14i; + x1i = y12i - y14r; + x0r = y13r - y15i; + x0i = y13i + y15r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[28] = x1r - x2i; + a[29] = x1i + x2r; + a[30] = x1r + x2i; + a[31] = x1i - x2r; +} + + +void cftf081(double *a, double *w) +{ + double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, + y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; + + wn4r = w[1]; + x0r = a[0] + a[8]; + x0i = a[1] + a[9]; + x1r = a[0] - a[8]; + x1i = a[1] - a[9]; + x2r = a[4] + a[12]; + x2i = a[5] + a[13]; + x3r = a[4] - a[12]; + x3i = a[5] - a[13]; + y0r = x0r + x2r; + y0i = x0i + x2i; + y2r = x0r - x2r; + y2i = x0i - x2i; + y1r = x1r - x3i; + y1i = x1i + x3r; + y3r = x1r + x3i; + y3i = x1i - x3r; + x0r = a[2] + a[10]; + x0i = a[3] + a[11]; + x1r = a[2] - a[10]; + x1i = a[3] - a[11]; + x2r = a[6] + a[14]; + x2i = a[7] + a[15]; + x3r = a[6] - a[14]; + x3i = a[7] - a[15]; + y4r = x0r + x2r; + y4i = x0i + x2i; + y6r = x0r - x2r; + y6i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + x2r = x1r + x3i; + x2i = x1i - x3r; + y5r = wn4r * (x0r - x0i); + y5i = wn4r * (x0r + x0i); + y7r = wn4r * (x2r - x2i); + y7i = wn4r * (x2r + x2i); + a[8] = y1r + y5r; + a[9] = y1i + y5i; + a[10] = y1r - y5r; + a[11] = y1i - y5i; + a[12] = y3r - y7i; + a[13] = y3i + y7r; + a[14] = y3r + y7i; + a[15] = y3i - y7r; + a[0] = y0r + y4r; + a[1] = y0i + y4i; + a[2] = y0r - y4r; + a[3] = y0i - y4i; + a[4] = y2r - y6i; + a[5] = y2i + y6r; + a[6] = y2r + y6i; + a[7] = y2i - y6r; +} + + +void cftf082(double *a, double *w) +{ + double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, + y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; + + wn4r = w[1]; + wk1r = w[2]; + wk1i = w[3]; + y0r = a[0] - a[9]; + y0i = a[1] + a[8]; + y1r = a[0] + a[9]; + y1i = a[1] - a[8]; + x0r = a[4] - a[13]; + x0i = a[5] + a[12]; + y2r = wn4r * (x0r - x0i); + y2i = wn4r * (x0i + x0r); + x0r = a[4] + a[13]; + x0i = a[5] - a[12]; + y3r = wn4r * (x0r - x0i); + y3i = wn4r * (x0i + x0r); + x0r = a[2] - a[11]; + x0i = a[3] + a[10]; + y4r = wk1r * x0r - wk1i * x0i; + y4i = wk1r * x0i + wk1i * x0r; + x0r = a[2] + a[11]; + x0i = a[3] - a[10]; + y5r = wk1i * x0r - wk1r * x0i; + y5i = wk1i * x0i + wk1r * x0r; + x0r = a[6] - a[15]; + x0i = a[7] + a[14]; + y6r = wk1i * x0r - wk1r * x0i; + y6i = wk1i * x0i + wk1r * x0r; + x0r = a[6] + a[15]; + x0i = a[7] - a[14]; + y7r = wk1r * x0r - wk1i * x0i; + y7i = wk1r * x0i + wk1i * x0r; + x0r = y0r + y2r; + x0i = y0i + y2i; + x1r = y4r + y6r; + x1i = y4i + y6i; + a[0] = x0r + x1r; + a[1] = x0i + x1i; + a[2] = x0r - x1r; + a[3] = x0i - x1i; + x0r = y0r - y2r; + x0i = y0i - y2i; + x1r = y4r - y6r; + x1i = y4i - y6i; + a[4] = x0r - x1i; + a[5] = x0i + x1r; + a[6] = x0r + x1i; + a[7] = x0i - x1r; + x0r = y1r - y3i; + x0i = y1i + y3r; + x1r = y5r - y7r; + x1i = y5i - y7i; + a[8] = x0r + x1r; + a[9] = x0i + x1i; + a[10] = x0r - x1r; + a[11] = x0i - x1i; + x0r = y1r + y3i; + x0i = y1i - y3r; + x1r = y5r + y7r; + x1i = y5i + y7i; + a[12] = x0r - x1i; + a[13] = x0i + x1r; + a[14] = x0r + x1i; + a[15] = x0i - x1r; +} + + +void cftf040(double *a) +{ + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = a[0] + a[4]; + x0i = a[1] + a[5]; + x1r = a[0] - a[4]; + x1i = a[1] - a[5]; + x2r = a[2] + a[6]; + x2i = a[3] + a[7]; + x3r = a[2] - a[6]; + x3i = a[3] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[6] = x1r + x3i; + a[7] = x1i - x3r; +} + + +void cftb040(double *a) +{ + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = a[0] + a[4]; + x0i = a[1] + a[5]; + x1r = a[0] - a[4]; + x1i = a[1] - a[5]; + x2r = a[2] + a[6]; + x2i = a[3] + a[7]; + x3r = a[2] - a[6]; + x3i = a[3] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[2] = x1r + x3i; + a[3] = x1i - x3r; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[6] = x1r - x3i; + a[7] = x1i + x3r; +} + + +void cftx020(double *a) +{ + double x0r, x0i; + + x0r = a[0] - a[2]; + x0i = a[1] - a[3]; + a[0] += a[2]; + a[1] += a[3]; + a[2] = x0r; + a[3] = x0i; +} + + +void rftfsub(int n, double *a, int nc, double *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr, xi, yr, yi; + + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5 - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } +} + + +void rftbsub(int n, double *a, int nc, double *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr, xi, yr, yi; + + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5 - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } +} + + +void dctsub(int n, double *a, int nc, double *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr; + + m = n >> 1; + ks = nc / n; + kk = 0; + for (j = 1; j < m; j++) { + k = n - j; + kk += ks; + wkr = c[kk] - c[nc - kk]; + wki = c[kk] + c[nc - kk]; + xr = wki * a[j] - wkr * a[k]; + a[j] = wkr * a[j] + wki * a[k]; + a[k] = xr; + } + a[m] *= c[0]; +} + + +void dstsub(int n, double *a, int nc, double *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr; + + m = n >> 1; + ks = nc / n; + kk = 0; + for (j = 1; j < m; j++) { + k = n - j; + kk += ks; + wkr = c[kk] - c[nc - kk]; + wki = c[kk] + c[nc - kk]; + xr = wki * a[k] - wkr * a[j]; + a[k] = wkr * a[k] + wki * a[j]; + a[j] = xr; + } + a[m] *= c[0]; +} + + +void rdft(int n, int isgn, double *a, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void cftfsub(int n, double *a, int *ip, int nw, double *w); + void cftbsub(int n, double *a, int *ip, int nw, double *w); + void rftfsub(int n, double *a, int nc, double *c); + void rftbsub(int n, double *a, int nc, double *c); + int nw, nc; + double xi; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 2)) { + nc = n >> 2; + makect(nc, ip, w + nw); + } + if (isgn >= 0) { + if (n > 4) { + cftfsub(n, a, ip, nw, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, ip, nw, w); + } + xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; + } else { + a[1] = 0.5 * (a[0] - a[1]); + a[0] -= a[1]; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + cftbsub(n, a, ip, nw, w); + } else if (n == 4) { + cftbsub(n, a, ip, nw, w); + } + } +} + + +void ddct(int n, int isgn, double *a, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void cftfsub(int n, double *a, int *ip, int nw, double *w); + void cftbsub(int n, double *a, int *ip, int nw, double *w); + void rftfsub(int n, double *a, int nc, double *c); + void rftbsub(int n, double *a, int nc, double *c); + void dctsub(int n, double *a, int nc, double *c); + int j, nw, nc; + double xr; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + if (isgn < 0) { + xr = a[n - 1]; + for (j = n - 2; j >= 2; j -= 2) { + a[j + 1] = a[j] - a[j - 1]; + a[j] += a[j - 1]; + } + a[1] = a[0] - xr; + a[0] += xr; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + cftbsub(n, a, ip, nw, w); + } else if (n == 4) { + cftbsub(n, a, ip, nw, w); + } + } + dctsub(n, a, nc, w + nw); + if (isgn >= 0) { + if (n > 4) { + cftfsub(n, a, ip, nw, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, ip, nw, w); + } + xr = a[0] - a[1]; + a[0] += a[1]; + for (j = 2; j < n; j += 2) { + a[j - 1] = a[j] - a[j + 1]; + a[j] += a[j + 1]; + } + a[n - 1] = xr; + } +} + + +void ddst(int n, int isgn, double *a, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void cftfsub(int n, double *a, int *ip, int nw, double *w); + void cftbsub(int n, double *a, int *ip, int nw, double *w); + void rftfsub(int n, double *a, int nc, double *c); + void rftbsub(int n, double *a, int nc, double *c); + void dstsub(int n, double *a, int nc, double *c); + int j, nw, nc; + double xr; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + if (isgn < 0) { + xr = a[n - 1]; + for (j = n - 2; j >= 2; j -= 2) { + a[j + 1] = -a[j] - a[j - 1]; + a[j] -= a[j - 1]; + } + a[1] = a[0] + xr; + a[0] -= xr; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + cftbsub(n, a, ip, nw, w); + } else if (n == 4) { + cftbsub(n, a, ip, nw, w); + } + } + dstsub(n, a, nc, w + nw); + if (isgn >= 0) { + if (n > 4) { + cftfsub(n, a, ip, nw, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, ip, nw, w); + } + xr = a[0] - a[1]; + a[0] += a[1]; + for (j = 2; j < n; j += 2) { + a[j - 1] = -a[j] - a[j + 1]; + a[j] -= a[j + 1]; + } + a[n - 1] = -xr; + } +} + + +void dfct(int n, double *a, double *t, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void cftfsub(int n, double *a, int *ip, int nw, double *w); + void rftfsub(int n, double *a, int nc, double *c); + void dctsub(int n, double *a, int nc, double *c); + int j, k, l, m, mh, nw, nc; + double xr, xi, yr, yi; + + nw = ip[0]; + if (n > (nw << 3)) { + nw = n >> 3; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 1)) { + nc = n >> 1; + makect(nc, ip, w + nw); + } + m = n >> 1; + yi = a[m]; + xi = a[0] + a[n]; + a[0] -= a[n]; + t[0] = xi - yi; + t[m] = xi + yi; + if (n > 2) { + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + xr = a[j] - a[n - j]; + xi = a[j] + a[n - j]; + yr = a[k] - a[n - k]; + yi = a[k] + a[n - k]; + a[j] = xr; + a[k] = yr; + t[j] = xi - yi; + t[k] = xi + yi; + } + t[mh] = a[mh] + a[n - mh]; + a[mh] -= a[n - mh]; + dctsub(m, a, nc, w + nw); + if (m > 4) { + cftfsub(m, a, ip, nw, w); + rftfsub(m, a, nc, w + nw); + } else if (m == 4) { + cftfsub(m, a, ip, nw, w); + } + a[n - 1] = a[0] - a[1]; + a[1] = a[0] + a[1]; + for (j = m - 2; j >= 2; j -= 2) { + a[2 * j + 1] = a[j] + a[j + 1]; + a[2 * j - 1] = a[j] - a[j + 1]; + } + l = 2; + m = mh; + while (m >= 2) { + dctsub(m, t, nc, w + nw); + if (m > 4) { + cftfsub(m, t, ip, nw, w); + rftfsub(m, t, nc, w + nw); + } else if (m == 4) { + cftfsub(m, t, ip, nw, w); + } + a[n - l] = t[0] - t[1]; + a[l] = t[0] + t[1]; + k = 0; + for (j = 2; j < m; j += 2) { + k += l << 2; + a[k - l] = t[j] - t[j + 1]; + a[k + l] = t[j] + t[j + 1]; + } + l <<= 1; + mh = m >> 1; + for (j = 0; j < mh; j++) { + k = m - j; + t[j] = t[m + k] - t[m + j]; + t[k] = t[m + k] + t[m + j]; + } + t[mh] = t[m + mh]; + m = mh; + } + a[l] = t[0]; + a[n] = t[2] - t[1]; + a[0] = t[2] + t[1]; + } else { + a[1] = a[0]; + a[2] = t[0]; + a[0] = t[1]; + } +} + + +void dfst(int n, double *a, double *t, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void cftfsub(int n, double *a, int *ip, int nw, double *w); + void rftfsub(int n, double *a, int nc, double *c); + void dstsub(int n, double *a, int nc, double *c); + int j, k, l, m, mh, nw, nc; + double xr, xi, yr, yi; + + nw = ip[0]; + if (n > (nw << 3)) { + nw = n >> 3; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 1)) { + nc = n >> 1; + makect(nc, ip, w + nw); + } + if (n > 2) { + m = n >> 1; + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + xr = a[j] + a[n - j]; + xi = a[j] - a[n - j]; + yr = a[k] + a[n - k]; + yi = a[k] - a[n - k]; + a[j] = xr; + a[k] = yr; + t[j] = xi + yi; + t[k] = xi - yi; + } + t[0] = a[mh] - a[n - mh]; + a[mh] += a[n - mh]; + a[0] = a[m]; + dstsub(m, a, nc, w + nw); + if (m > 4) { + cftfsub(m, a, ip, nw, w); + rftfsub(m, a, nc, w + nw); + } else if (m == 4) { + cftfsub(m, a, ip, nw, w); + } + a[n - 1] = a[1] - a[0]; + a[1] = a[0] + a[1]; + for (j = m - 2; j >= 2; j -= 2) { + a[2 * j + 1] = a[j] - a[j + 1]; + a[2 * j - 1] = -a[j] - a[j + 1]; + } + l = 2; + m = mh; + while (m >= 2) { + dstsub(m, t, nc, w + nw); + if (m > 4) { + cftfsub(m, t, ip, nw, w); + rftfsub(m, t, nc, w + nw); + } else if (m == 4) { + cftfsub(m, t, ip, nw, w); + } + a[n - l] = t[1] - t[0]; + a[l] = t[0] + t[1]; + k = 0; + for (j = 2; j < m; j += 2) { + k += l << 2; + a[k - l] = -t[j] - t[j + 1]; + a[k + l] = t[j] - t[j + 1]; + } + l <<= 1; + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + t[j] = t[m + k] + t[m + j]; + t[k] = t[m + k] - t[m + j]; + } + t[0] = t[m + mh]; + m = mh; + } + a[l] = t[0]; + } + a[0] = 0; +} + + +/* -------- initializing routines -------- */ + + +#include + +void makewt(int nw, int *ip, double *w) +{ + void makeipt(int nw, int *ip); + int j, nwh, nw0, nw1; + double delta, wn4r, wk1r, wk1i, wk3r, wk3i; + + ip[0] = nw; + ip[1] = 1; + if (nw > 2) { + nwh = nw >> 1; + delta = atan(1.0) / nwh; + wn4r = cos(delta * nwh); + w[0] = 1; + w[1] = wn4r; + if (nwh == 4) { + w[2] = cos(delta * 2); + w[3] = sin(delta * 2); + } else if (nwh > 4) { + makeipt(nw, ip); + w[2] = 0.5 / cos(delta * 2); + w[3] = 0.5 / cos(delta * 6); + for (j = 4; j < nwh; j += 4) { + w[j] = cos(delta * j); + w[j + 1] = sin(delta * j); + w[j + 2] = cos(3 * delta * j); + w[j + 3] = -sin(3 * delta * j); + } + } + nw0 = 0; + while (nwh > 2) { + nw1 = nw0 + nwh; + nwh >>= 1; + w[nw1] = 1; + w[nw1 + 1] = wn4r; + if (nwh == 4) { + wk1r = w[nw0 + 4]; + wk1i = w[nw0 + 5]; + w[nw1 + 2] = wk1r; + w[nw1 + 3] = wk1i; + } else if (nwh > 4) { + wk1r = w[nw0 + 4]; + wk3r = w[nw0 + 6]; + w[nw1 + 2] = 0.5 / wk1r; + w[nw1 + 3] = 0.5 / wk3r; + for (j = 4; j < nwh; j += 4) { + wk1r = w[nw0 + 2 * j]; + wk1i = w[nw0 + 2 * j + 1]; + wk3r = w[nw0 + 2 * j + 2]; + wk3i = w[nw0 + 2 * j + 3]; + w[nw1 + j] = wk1r; + w[nw1 + j + 1] = wk1i; + w[nw1 + j + 2] = wk3r; + w[nw1 + j + 3] = wk3i; + } + } + nw0 = nw1; + } + } +} + + +void makeipt(int nw, int *ip) +{ + int j, l, m, m2, p, q; + + ip[2] = 0; + ip[3] = 16; + m = 2; + for (l = nw; l > 32; l >>= 2) { + m2 = m << 1; + q = m2 << 3; + for (j = m; j < m2; j++) { + p = ip[j] << 2; + ip[m + j] = p; + ip[m2 + j] = p + q; + } + m = m2; + } +} + + +void makect(int nc, int *ip, double *c) +{ + int j, nch; + double delta; + + ip[1] = nc; + if (nc > 1) { + nch = nc >> 1; + delta = atan(1.0) / nch; + c[0] = cos(delta * nch); + c[nch] = 0.5 * c[0]; + for (j = 1; j < nch; j++) { + c[j] = 0.5 * cos(delta * j); + c[nc - j] = 0.5 * sin(delta * j); + } + } +} diff --git a/src/integral_types.h b/src/integral_types.h new file mode 100644 index 0000000..0f7c5a6 --- /dev/null +++ b/src/integral_types.h @@ -0,0 +1,33 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_INTEGRAL_TYPES_H_ +#define TENSORFLOW_CORE_PLATFORM_DEFAULT_INTEGRAL_TYPES_H_ + +namespace tensorflow { + +typedef signed char int8; +typedef short int16; +typedef int int32; +typedef long long int64; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long long uint64; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_INTEGRAL_TYPES_H_ diff --git a/src/main.cc b/src/main.cc new file mode 100644 index 0000000..1e21df0 --- /dev/null +++ b/src/main.cc @@ -0,0 +1,96 @@ +#include +#include + +#include "mfcc.h" +#include "spectrogram.h" + +using namespace std; + +vector > get_audio_spectrogram(vector input, + int window_length, + int step_length) +{ + tensorflow::Spectrogram sgram; + vector > output; + + sgram.Initialize(window_length, step_length); + sgram.ComputeSquaredMagnitudeSpectrogram(input, &output); + + return output; +} + +vector > get_audio_mfcc(vector > spectrogram_output, + int sample_rate = 16000, + int upper_frequency_limit = 4000, + int lower_frequency_limit = 20, + int filterbank_channel_count = 40, + int dct_coefficient_count = 13) +{ + tensorflow::Mfcc mfcc; + vector > mfcc_output(spectrogram_output.size()); + + mfcc.set_upper_frequency_limit(upper_frequency_limit); + mfcc.set_lower_frequency_limit(lower_frequency_limit); + mfcc.set_filterbank_channel_count(filterbank_channel_count); + mfcc.set_dct_coefficient_count(dct_coefficient_count); + mfcc.Initialize(spectrogram_output[0].size(), sample_rate); + + for(size_t i = 0; i < spectrogram_output.size(); ++i) + { + vector output; + mfcc.Compute(spectrogram_output[i], &output); + mfcc_output[i] = output; + } + + return mfcc_output; +} + +extern "C" { + /** + * @brief Calculate Mel Frequency Cepstral Coefficents (MFCCs) from audio PCM. + * @param *pcm Audio PCM value pointer + * @param *len Buffer length pointer + * @param sample_rate Aduio Sample Rate + * @param window_size sample_rate * window_size_ms / 1000 + * @param window_stride sample_rate * window_stride_ms / 1000 + * @param upper_frequency_limit Default 4000 + * @param lower_frequency_limit Default 20 + * @param filterbank_channel_count Default 40 + * @param dct_coefficient_count MFCC feature extractor should return + * + * @return + * MFCCs pointer + */ + double* EMSCRIPTEN_KEEPALIVE tf_mfccs(double *pcm, + int *len, + int sample_rate, + int window_size, + int window_stride, + int upper_frequency_limit, + int lower_frequency_limit, + int filterbank_channel_count, + int dct_coefficient_count) + { + vector v_pcm(pcm, pcm + *len); + + vector > spectrogram_output = get_audio_spectrogram(v_pcm, window_size, window_stride); + + vector > mfcc_output = get_audio_mfcc(spectrogram_output, sample_rate, + upper_frequency_limit, lower_frequency_limit, + filterbank_channel_count, dct_coefficient_count); + + *len = mfcc_output.size() * mfcc_output[0].size(); + double mfccs[*len]; + int mfccs_index = 0; + + for(int i = 0; i < mfcc_output.size(); ++i) + { + for(int j = 0; j < mfcc_output[i].size(); ++j) + { + mfccs[mfccs_index++] = mfcc_output[i][j]; + } + } + + return mfccs; + } +} diff --git a/src/mfcc.cc b/src/mfcc.cc new file mode 100644 index 0000000..98566af --- /dev/null +++ b/src/mfcc.cc @@ -0,0 +1,67 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "mfcc.h" +// #include "logging.h" + +using std::cout; + +namespace tensorflow { + +const double kDefaultUpperFrequencyLimit = 4000; +const double kDefaultLowerFrequencyLimit = 20; +const double kFilterbankFloor = 1e-12; +const int kDefaultFilterbankChannelCount = 40; +const int kDefaultDCTCoefficientCount = 13; + +Mfcc::Mfcc() + : initialized_(false), + lower_frequency_limit_(kDefaultLowerFrequencyLimit), + upper_frequency_limit_(kDefaultUpperFrequencyLimit), + filterbank_channel_count_(kDefaultFilterbankChannelCount), + dct_coefficient_count_(kDefaultDCTCoefficientCount) {} + +bool Mfcc::Initialize(int input_length, double input_sample_rate) { + bool initialized = mel_filterbank_.Initialize( + input_length, input_sample_rate, filterbank_channel_count_, + lower_frequency_limit_, upper_frequency_limit_); + initialized &= + dct_.Initialize(filterbank_channel_count_, dct_coefficient_count_); + initialized_ = initialized; + return initialized; +} + +void Mfcc::Compute(const std::vector& spectrogram_frame, + std::vector* output) const { + if (!initialized_) { + cout << "Mfcc not initialized."; + return; + } + std::vector working; + mel_filterbank_.Compute(spectrogram_frame, &working); + for (int i = 0; i < working.size(); ++i) { + double val = working[i]; + if (val < kFilterbankFloor) { + val = kFilterbankFloor; + } + working[i] = log(val); + } + dct_.Compute(working, output); +} + +} // namespace tensorflow diff --git a/src/mfcc.h b/src/mfcc.h new file mode 100644 index 0000000..3f62a6e --- /dev/null +++ b/src/mfcc.h @@ -0,0 +1,70 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Basic class for computing MFCCs from spectrogram slices. + +#ifndef TENSORFLOW_CORE_KERNELS_MFCC_H_ +#define TENSORFLOW_CORE_KERNELS_MFCC_H_ + +#include +#include + +#include "mfcc_dct.h" +#include "mfcc_mel_filterbank.h" + +namespace tensorflow { + +class Mfcc { + public: + Mfcc(); + bool Initialize(int input_length, double input_sample_rate); + + // Input is a single squared-magnitude spectrogram frame. The input spectrum + // is converted to linear magnitude and weighted into bands using a + // triangular mel filterbank, and a discrete cosine transform (DCT) of the + // values is taken. Output is populated with the lowest dct_coefficient_count + // of these values. + void Compute(const std::vector& spectrogram_frame, + std::vector* output) const; + + void set_upper_frequency_limit(double upper_frequency_limit) { + upper_frequency_limit_ = upper_frequency_limit; + } + + void set_lower_frequency_limit(double lower_frequency_limit) { + lower_frequency_limit_ = lower_frequency_limit; + } + + void set_filterbank_channel_count(int filterbank_channel_count) { + filterbank_channel_count_ = filterbank_channel_count; + } + + void set_dct_coefficient_count(int dct_coefficient_count) { + dct_coefficient_count_ = dct_coefficient_count; + } + + private: + MfccMelFilterbank mel_filterbank_; + MfccDct dct_; + bool initialized_; + double lower_frequency_limit_; + double upper_frequency_limit_; + int filterbank_channel_count_; + int dct_coefficient_count_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_MFCC_H_ diff --git a/src/mfcc_dct.cc b/src/mfcc_dct.cc new file mode 100644 index 0000000..ba83930 --- /dev/null +++ b/src/mfcc_dct.cc @@ -0,0 +1,83 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "mfcc_dct.h" +// #include "logging.h" +using std::cout; + +namespace tensorflow { + +MfccDct::MfccDct() : initialized_(false) {} + +bool MfccDct::Initialize(int input_length, int coefficient_count) { + coefficient_count_ = coefficient_count; + input_length_ = input_length; + + if (coefficient_count_ < 1) { + cout << "Coefficient count must be positive."; + return false; + } + + if (input_length < 1) { + cout << "Input length must be positive."; + return false; + } + + if (coefficient_count_ > input_length_) { + cout << "Coefficient count must be less than or equal to " + << "input length."; + return false; + } + + cosines_.resize(coefficient_count_); + double fnorm = sqrt(2.0 / input_length_); + // Some platforms don't have M_PI, so define a local constant here. + const double pi = 3.1415926; + double arg = pi / input_length_; + for (int i = 0; i < coefficient_count_; ++i) { + cosines_[i].resize(input_length_); + for (int j = 0; j < input_length_; ++j) { + cosines_[i][j] = fnorm * cos(i * arg * (j + 0.5)); + } + } + initialized_ = true; + return true; +} + +void MfccDct::Compute(const std::vector &input, + std::vector *output) const { + if (!initialized_) { + cout << "DCT not initialized."; + return; + } + + output->resize(coefficient_count_); + int length = input.size(); + if (length > input_length_) { + length = input_length_; + } + + for (int i = 0; i < coefficient_count_; ++i) { + double sum = 0.0; + for (int j = 0; j < length; ++j) { + sum += cosines_[i][j] * input[j]; + } + (*output)[i] = sum; + } +} + +} // namespace tensorflow diff --git a/src/mfcc_dct.h b/src/mfcc_dct.h new file mode 100644 index 0000000..b1ab300 --- /dev/null +++ b/src/mfcc_dct.h @@ -0,0 +1,42 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Basic minimal DCT class for MFCC speech processing. + +#ifndef TENSORFLOW_CORE_KERNELS_MFCC_DCT_H_ +#define TENSORFLOW_CORE_KERNELS_MFCC_DCT_H_ + +#include +#include + +namespace tensorflow { + +class MfccDct { + public: + MfccDct(); + bool Initialize(int input_length, int coefficient_count); + void Compute(const std::vector& input, + std::vector* output) const; + + private: + bool initialized_; + int coefficient_count_; + int input_length_; + std::vector > cosines_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_MFCC_DCT_H_ diff --git a/src/mfcc_mel_filterbank.cc b/src/mfcc_mel_filterbank.cc new file mode 100644 index 0000000..0a98db8 --- /dev/null +++ b/src/mfcc_mel_filterbank.cc @@ -0,0 +1,205 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This code resamples the FFT bins, and smooths then with triangle-shaped +// weights to create a mel-frequency filter bank. For filter i centered at f_i, +// there is a triangular weighting of the FFT bins that extends from +// filter f_i-1 (with a value of zero at the left edge of the triangle) to f_i +// (where the filter value is 1) to f_i+1 (where the filter values returns to +// zero). + +// Note: this code fails if you ask for too many channels. The algorithm used +// here assumes that each FFT bin contributes to at most two channels: the +// right side of a triangle for channel i, and the left side of the triangle +// for channel i+1. If you ask for so many channels that some of the +// resulting mel triangle filters are smaller than a single FFT bin, these +// channels may end up with no contributing FFT bins. The resulting mel +// spectrum output will have some channels that are always zero. + +#include +#include + +#include "mfcc_mel_filterbank.h" + +// #include "logging.h" +using std::cout; +using std::endl; + +namespace tensorflow { + +MfccMelFilterbank::MfccMelFilterbank() : initialized_(false) {} + +bool MfccMelFilterbank::Initialize(int input_length, double input_sample_rate, + int output_channel_count, + double lower_frequency_limit, + double upper_frequency_limit) { + num_channels_ = output_channel_count; + sample_rate_ = input_sample_rate; + input_length_ = input_length; + + if (num_channels_ < 1) { + cout << "Number of filterbank channels must be positive." << endl; + return false; + } + + if (sample_rate_ <= 0) { + cout << "Sample rate must be positive." << endl; + return false; + } + + if (input_length < 2) { + cout << "Input length must greater than 1." << endl; + return false; + } + + if (lower_frequency_limit < 0) { + cout << "Lower frequency limit must be nonnegative." << endl; + return false; + } + + if (upper_frequency_limit <= lower_frequency_limit) { + cout << "Upper frequency limit must be greater than " + << "lower frequency limit." << endl; + return false; + } + + // An extra center frequency is computed at the top to get the upper + // limit on the high side of the final triangular filter. + center_frequencies_.resize(num_channels_ + 1); + const double mel_low = FreqToMel(lower_frequency_limit); + const double mel_hi = FreqToMel(upper_frequency_limit); + const double mel_span = mel_hi - mel_low; + const double mel_spacing = mel_span / static_cast(num_channels_ + 1); + for (int i = 0; i < num_channels_ + 1; ++i) { + center_frequencies_[i] = mel_low + (mel_spacing * (i + 1)); + } + + // Always exclude DC; emulate HTK. + const double hz_per_sbin = + 0.5 * sample_rate_ / static_cast(input_length_ - 1); + start_index_ = static_cast(1.5 + (lower_frequency_limit / hz_per_sbin)); + end_index_ = static_cast(upper_frequency_limit / hz_per_sbin); + + // Maps the input spectrum bin indices to filter bank channels/indices. For + // each FFT bin, band_mapper tells us which channel this bin contributes to + // on the right side of the triangle. Thus this bin also contributes to the + // left side of the next channel's triangle response. + band_mapper_.resize(input_length_); + int channel = 0; + for (int i = 0; i < input_length_; ++i) { + double melf = FreqToMel(i * hz_per_sbin); + if ((i < start_index_) || (i > end_index_)) { + band_mapper_[i] = -2; // Indicate an unused Fourier coefficient. + } else { + while ((center_frequencies_[channel] < melf) && + (channel < num_channels_)) { + ++channel; + } + band_mapper_[i] = channel - 1; // Can be == -1 + } + } + + // Create the weighting functions to taper the band edges. The contribution + // of any one FFT bin is based on its distance along the continuum between two + // mel-channel center frequencies. This bin contributes weights_[i] to the + // current channel and 1-weights_[i] to the next channel. + weights_.resize(input_length_); + for (int i = 0; i < input_length_; ++i) { + channel = band_mapper_[i]; + if ((i < start_index_) || (i > end_index_)) { + weights_[i] = 0.0; + } else { + if (channel >= 0) { + weights_[i] = + (center_frequencies_[channel + 1] - FreqToMel(i * hz_per_sbin)) / + (center_frequencies_[channel + 1] - center_frequencies_[channel]); + } else { + weights_[i] = (center_frequencies_[0] - FreqToMel(i * hz_per_sbin)) / + (center_frequencies_[0] - mel_low); + } + } + } + // Check the sum of FFT bin weights for every mel band to identify + // situations where the mel bands are so narrow that they don't get + // significant weight on enough (or any) FFT bins -- i.e., too many + // mel bands have been requested for the given FFT size. + std::vector bad_channels; + for (int c = 0; c < num_channels_; ++c) { + float band_weights_sum = 0.0; + for (int i = 0; i < input_length_; ++i) { + if (band_mapper_[i] == c - 1) { + band_weights_sum += (1.0 - weights_[i]); + } else if (band_mapper_[i] == c) { + band_weights_sum += weights_[i]; + } + } + // The lowest mel channels have the fewest FFT bins and the lowest + // weights sum. But given that the target gain at the center frequency + // is 1.0, if the total sum of weights is 0.5, we're in bad shape. + if (band_weights_sum < 0.5) { + bad_channels.push_back(c); + } + } + if (!bad_channels.empty()) { + cout << "Missing " << bad_channels.size() << " bands " + << " starting at " << bad_channels[0] + << " in mel-frequency design. " + << "Perhaps too many channels or " + << "not enough frequency resolution in spectrum. (" + << "input_length: " << input_length + << " input_sample_rate: " << input_sample_rate + << " output_channel_count: " << output_channel_count + << " lower_frequency_limit: " << lower_frequency_limit + << " upper_frequency_limit: " << upper_frequency_limit << endl; + } + initialized_ = true; + return true; +} + +// Compute the mel spectrum from the squared-magnitude FFT input by taking the +// square root, then summing FFT magnitudes under triangular integration windows +// whose widths increase with frequency. +void MfccMelFilterbank::Compute(const std::vector &input, + std::vector *output) const { + if (!initialized_) { + cout << "Mel Filterbank not initialized." << endl; + return; + } + + if (input.size() <= end_index_) { + cout << "Input too short to compute filterbank" << endl; + return; + } + + // Ensure output is right length and reset all values. + output->assign(num_channels_, 0.0); + + for (int i = start_index_; i <= end_index_; i++) { // For each FFT bin + double spec_val = sqrt(input[i]); + double weighted = spec_val * weights_[i]; + int channel = band_mapper_[i]; + if (channel >= 0) + (*output)[channel] += weighted; // Right side of triangle, downward slope + channel++; + if (channel < num_channels_) + (*output)[channel] += spec_val - weighted; // Left side of triangle + } +} + +double MfccMelFilterbank::FreqToMel(double freq) const { + return 1127.0 * log1p(freq / 700.0); +} + +} // namespace tensorflow diff --git a/src/mfcc_mel_filterbank.h b/src/mfcc_mel_filterbank.h new file mode 100644 index 0000000..727d8c8 --- /dev/null +++ b/src/mfcc_mel_filterbank.h @@ -0,0 +1,63 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Basic class for applying a mel-scale mapping to a power spectrum. + +#ifndef TENSORFLOW_CORE_KERNELS_MFCC_MEL_FILTERBANK_H_ +#define TENSORFLOW_CORE_KERNELS_MFCC_MEL_FILTERBANK_H_ + +#include +#include + +namespace tensorflow { + +class MfccMelFilterbank { + public: + MfccMelFilterbank(); + bool Initialize(int input_length, // Number of unique FFT bins fftsize/2+1. + double input_sample_rate, int output_channel_count, + double lower_frequency_limit, double upper_frequency_limit); + + // Takes a squared-magnitude spectrogram slice as input, computes a + // triangular-mel-weighted linear-magnitude filterbank, and places the result + // in output. + void Compute(const std::vector& input, + std::vector* output) const; + + private: + double FreqToMel(double freq) const; + bool initialized_; + int num_channels_; + double sample_rate_; + int input_length_; + std::vector center_frequencies_; // In mel, for each mel channel. + + // Each FFT bin b contributes to two triangular mel channels, with + // proportion weights_[b] going into mel channel band_mapper_[b], and + // proportion (1 - weights_[b]) going into channel band_mapper_[b] + 1. + // Thus, weights_ contains the weighting applied to each FFT bin for the + // upper-half of the triangular band. + std::vector weights_; // Right-side weight for this fft bin. + + // FFT bin i contributes to the upper side of mel channel band_mapper_[i] + std::vector band_mapper_; + int start_index_; // Lowest FFT bin used to calculate mel spectrum. + int end_index_; // Highest FFT bin used to calculate mel spectrum. + +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_MFCC_MEL_FILTERBANK_H_ diff --git a/src/platform.h b/src/platform.h new file mode 100644 index 0000000..3e08147 --- /dev/null +++ b/src/platform.h @@ -0,0 +1,57 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_PLATFORM_PLATFORM_DEFINE_H_ +#define TENSORFLOW_PLATFORM_PLATFORM_DEFINE_H_ + +// Set one PLATFORM_* macro and set IS_MOBILE_PLATFORM if the platform is for +// mobile. + +#if !defined(PLATFORM_POSIX) && !defined(PLATFORM_GOOGLE) && \ + !defined(PLATFORM_POSIX_ANDROID) && !defined(PLATFORM_GOOGLE_ANDROID) && \ + !defined(PLATFORM_WINDOWS) + +// Choose which platform we are on. +#if defined(ANDROID) || defined(__ANDROID__) +#define PLATFORM_POSIX_ANDROID +#define IS_MOBILE_PLATFORM + +#elif defined(_WIN32) +#define PLATFORM_WINDOWS + +#elif defined(__arm__) || defined(__EMSCRIPTEN__) +#define PLATFORM_POSIX + +// Require an outside macro to tell us if we're building for Raspberry Pi or +// another ARM device that's not a mobile platform. +#if !defined(RASPBERRY_PI) && !defined(ARM_NON_MOBILE) +#define IS_MOBILE_PLATFORM +#endif // !defined(RASPBERRY_PI) && !defined(ARM_NON_MOBILE) + +#else +// If no platform specified, use: +#define PLATFORM_POSIX + +#endif +#endif + +// Look for both gcc/clang and Visual Studio macros indicating we're compiling +// for an x86 device. +#if defined(__x86_64__) || defined(__amd64__) || defined(_M_IX86) || \ + defined(_M_X64) +#define PLATFORM_IS_X86 +#endif + +#endif // TENSORFLOW_PLATFORM_PLATFORM_DEFINE_H_ diff --git a/src/spectrogram.cc b/src/spectrogram.cc new file mode 100644 index 0000000..f01ea12 --- /dev/null +++ b/src/spectrogram.cc @@ -0,0 +1,213 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include "spectrogram.h" +// #include "fft.h" +#include "bits.h" + +#include "fftsg.c" + +using std::complex; +using std::cout; +using std::endl; + +namespace tensorflow { + +namespace { +// Returns the default Hann window function for the spectrogram. +void GetPeriodicHann(int window_length, std::vector* window) { + // Some platforms don't have M_PI, so define a local constant here. + const double pi = std::atan(1) * 4; + window->resize(window_length); + for (int i = 0; i < window_length; ++i) { + (*window)[i] = 0.5 - 0.5 * cos((2 * pi * i) / window_length); + } +} +} // namespace + +bool Spectrogram::Initialize(int window_length, int step_length) { + std::vector window; + GetPeriodicHann(window_length, &window); + return Initialize(window, step_length); +} + +bool Spectrogram::Initialize(const std::vector& window, int step_length) { + window_length_ = window.size(); + window_ = window; // Copy window. + if (window_length_ < 2) { + cout << "Window length too short." << endl; + initialized_ = false; + return false; + } + + step_length_ = step_length; + if (step_length_ < 1) { + cout << "Step length must be positive." << endl; + initialized_ = false; + return false; + } + + fft_length_ = NextPowerOfTwo(window_length_); + // CHECK(fft_length_ >= window_length_); + output_frequency_channels_ = 1 + fft_length_ / 2; + + // Allocate 2 more than what rdft needs, so we can rationalize the layout. + fft_input_output_.assign(fft_length_ + 2, 0.0); + + int half_fft_length = fft_length_ / 2; + fft_double_working_area_.assign(half_fft_length, 0.0); + fft_integer_working_area_.assign(2 + static_cast(sqrt(half_fft_length)), + 0); + // Set flag element to ensure that the working areas are initialized + // on the first call to cdft. It's redundant given the assign above, + // but keep it as a reminder. + fft_integer_working_area_[0] = 0; + input_queue_.clear(); + samples_to_next_step_ = window_length_; + initialized_ = true; + return true; +} + +template +bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector > >* output) { + if (!initialized_) { + cout << "ComputeComplexSpectrogram() called before successful call " + << "to Initialize()."; + return false; + } + // CHECK(output); + output->clear(); + int input_start = 0; + while (GetNextWindowOfSamples(input, &input_start)) { + // DCHECK_EQ(input_queue_.size(), window_length_); + ProcessCoreFFT(); // Processes input_queue_ to fft_input_output_. + // Add a new slice vector onto the output, to save new result to. + output->resize(output->size() + 1); + // Get a reference to the newly added slice to fill in. + std::vector >& spectrogram_slice = output->back(); + spectrogram_slice.resize(output_frequency_channels_); + for (int i = 0; i < output_frequency_channels_; ++i) { + // This will convert double to float if it needs to. + spectrogram_slice[i] = complex( + fft_input_output_[2 * i], fft_input_output_[2 * i + 1]); + } + } + return true; +} +// Instantiate it four ways: +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, std::vector > >*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector > >*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector > >*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector > >*); + +template +bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, + std::vector >* output) { + if (!initialized_) { + cout << "ComputeSquaredMagnitudeSpectrogram() called before " + << "successful call to Initialize()."; + return false; + } + // CHECK(output); + output->clear(); + int input_start = 0; + while (GetNextWindowOfSamples(input, &input_start)) { + // DCHECK_EQ(input_queue_.size(), window_length_); + ProcessCoreFFT(); // Processes input_queue_ to fft_input_output_. + // Add a new slice vector onto the output, to save new result to. + output->resize(output->size() + 1); + // Get a reference to the newly added slice to fill in. + std::vector& spectrogram_slice = output->back(); + spectrogram_slice.resize(output_frequency_channels_); + for (int i = 0; i < output_frequency_channels_; ++i) { + // Similar to the Complex case, except storing the norm. + // But the norm function is known to be a performance killer, + // so do it this way with explicit real and imagninary temps. + const double re = fft_input_output_[2 * i]; + const double im = fft_input_output_[2 * i + 1]; + // Which finally converts double to float if it needs to. + spectrogram_slice[i] = re * re + im * im; + } + } + return true; +} +// Instantiate it four ways: +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector >*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector >*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector >*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector >*); + +// Return true if a full window of samples is prepared; manage the queue. +template +bool Spectrogram::GetNextWindowOfSamples(const std::vector& input, int* input_start) { + auto input_it = input.begin() + *input_start; + // input_it += *input_start; + int input_remaining = input.end() - input_it; + if (samples_to_next_step_ > input_remaining) { + // Copy in as many samples are left and return false, no full window. + input_queue_.insert(input_queue_.end(), input_it, input.end()); + *input_start += input_remaining; // Increases it to input.size(). + samples_to_next_step_ -= input_remaining; + return false; // Not enough for a full window. + } else { + // Copy just enough into queue to make a new window, then trim the + // front off the queue to make it window-sized. + input_queue_.insert(input_queue_.end(), input_it, input_it + samples_to_next_step_); + *input_start += samples_to_next_step_; + input_queue_.erase( + input_queue_.begin(), + input_queue_.begin() + input_queue_.size() - window_length_); + // DCHECK_EQ(window_length_, input_queue_.size()); + samples_to_next_step_ = step_length_; // Be ready for next time. + return true; // Yes, input_queue_ now contains exactly a window-full. + } +} + +void Spectrogram::ProcessCoreFFT() { + for (int j = 0; j < window_length_; ++j) { + fft_input_output_[j] = input_queue_[j] * window_[j]; + } + // Zero-pad the rest of the input buffer. + for (int j = window_length_; j < fft_length_; ++j) { + fft_input_output_[j] = 0.0; + } + const int kForwardFFT = 1; // 1 means forward; -1 reverse. + // This real FFT is a fair amount faster than using cdft here. + rdft(fft_length_, kForwardFFT, &fft_input_output_[0], + &fft_integer_working_area_[0], &fft_double_working_area_[0]); + // Make rdft result look like cdft result; + // unpack the last real value from the first position's imag slot. + fft_input_output_[fft_length_] = fft_input_output_[1]; + fft_input_output_[fft_length_ + 1] = 0; + fft_input_output_[1] = 0; +} + +} // namespace tensorflow diff --git a/src/spectrogram.h b/src/spectrogram.h new file mode 100644 index 0000000..f4fedba --- /dev/null +++ b/src/spectrogram.h @@ -0,0 +1,108 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Class for generating spectrogram slices from a waveform. +// Initialize() should be called before calls to other functions. Once +// Initialize() has been called and returned true, The Compute*() functions can +// be called repeatedly with sequential input data (ie. the first element of the +// next input vector directly follows the last element of the previous input +// vector). Whenever enough audio samples are buffered to produce a +// new frame, it will be placed in output. Output is cleared on each +// call to Compute*(). This class is thread-unsafe, and should only be +// called from one thread at a time. +// With the default parameters, the output of this class should be very +// close to the results of the following MATLAB code: +// overlap_samples = window_length_samples - step_samples; +// window = hann(window_length_samples, 'periodic'); +// S = abs(spectrogram(audio, window, overlap_samples)).^2; + +#ifndef TENSORFLOW_CORE_KERNELS_SPECTROGRAM_H_ +#define TENSORFLOW_CORE_KERNELS_SPECTROGRAM_H_ + +#include +#include +#include +#include + +namespace tensorflow { + +class Spectrogram { + public: + Spectrogram() : initialized_(false) {} + ~Spectrogram() {} + + // Initializes the class with a given window length and step length + // (both in samples). Internally a Hann window is used as the window + // function. Returns true on success, after which calls to Process() + // are possible. window_length must be greater than 1 and step + // length must be greater than 0. + bool Initialize(int window_length, int step_length); + + // Initialize with an explicit window instead of a length. + bool Initialize(const std::vector& window, int step_length); + + // Processes an arbitrary amount of audio data (contained in input) + // to yield complex spectrogram frames. After a successful call to + // Initialize(), Process() may be called repeatedly with new input data + // each time. The audio input is buffered internally, and the output + // vector is populated with as many temporally-ordered spectral slices + // as it is possible to generate from the input. The output is cleared + // on each call before the new frames (if any) are added. + // + // The template parameters can be float or double. + template + bool ComputeComplexSpectrogram( + const std::vector& input, + std::vector > >* output); + + // This function works as the one above, but returns the power + // (the L2 norm, or the squared magnitude) of each complex value. + template + bool ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, + std::vector >* output); + + // Return reference to the window function used internally. + const std::vector& GetWindow() const { return window_; } + + // Return the number of frequency channels in the spectrogram. + int output_frequency_channels() const { return output_frequency_channels_; } + + private: + template + bool GetNextWindowOfSamples(const std::vector& input, + int* input_start); + void ProcessCoreFFT(); + + int fft_length_; + int output_frequency_channels_; + int window_length_; + int step_length_; + bool initialized_; + int samples_to_next_step_; + + std::vector window_; + std::vector fft_input_output_; + std::deque input_queue_; + + // Working data areas for the FFT routines. + std::vector fft_integer_working_area_; + std::vector fft_double_working_area_; + +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_SPECTROGRAM_H_ diff --git a/src/types.h b/src/types.h new file mode 100644 index 0000000..5b3dfb7 --- /dev/null +++ b/src/types.h @@ -0,0 +1,59 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_TYPES_H_ +#define TENSORFLOW_CORE_PLATFORM_TYPES_H_ + +#include +#include "platform.h" + + +// Include appropriate platform-dependent implementations +#if defined(PLATFORM_GOOGLE) || defined(GOOGLE_INTEGRAL_TYPES) +#include "integral_types.h" +#elif defined(PLATFORM_WINDOWS) +#include "integral_types.h" +#elif defined(PLATFORM_POSIX) || defined(PLATFORM_POSIX_ANDROID) || \ + defined(PLATFORM_GOOGLE_ANDROID) +#include "integral_types.h" +#else +#error Define the appropriate PLATFORM_ macro for this platform +#endif + + +namespace tensorflow { + +// Alias tensorflow::string to std::string. +using std::string; + +static const uint8 kuint8max = ((uint8)0xFF); +static const uint16 kuint16max = ((uint16)0xFFFF); +static const uint32 kuint32max = ((uint32)0xFFFFFFFF); +static const uint64 kuint64max = ((uint64)0xFFFFFFFFFFFFFFFFull); +static const int8 kint8min = ((int8)~0x7F); +static const int8 kint8max = ((int8)0x7F); +static const int16 kint16min = ((int16)~0x7FFF); +static const int16 kint16max = ((int16)0x7FFF); +static const int32 kint32min = ((int32)~0x7FFFFFFF); +static const int32 kint32max = ((int32)0x7FFFFFFF); +static const int64 kint64min = ((int64)~0x7FFFFFFFFFFFFFFFll); +static const int64 kint64max = ((int64)0x7FFFFFFFFFFFFFFFll); + +// A typedef for a uint64 used as a short fingerprint. +typedef uint64 Fprint; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PLATFORM_TYPES_H_